468 files changed, 19472 insertions, 7440 deletions
diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp
index 3351bb5..6857c4c 100644
--- a/bolt/lib/Profile/DataReader.cpp
+++ b/bolt/lib/Profile/DataReader.cpp
@@ -392,7 +392,6 @@ void DataReader::readProfile(BinaryFunction &BF) {
     }
   }
 
-  uint64_t MismatchedBranches = 0;
   for (const BranchInfo &BI : FBD->Data) {
     if (BI.From.Name != BI.To.Name)
       continue;
@@ -401,7 +400,6 @@ void DataReader::readProfile(BinaryFunction &BF) {
                       BI.Mispreds)) {
       LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
                         << BI.To.Offset << '\n');
-      ++MismatchedBranches;
     }
   }
 
diff --git a/clang-tools-extra/clangd/InlayHints.cpp b/clang-tools-extra/clangd/InlayHints.cpp
index bd124aa..c5876ac 100644
--- a/clang-tools-extra/clangd/InlayHints.cpp
+++ b/clang-tools-extra/clangd/InlayHints.cpp
@@ -10,6 +10,7 @@
 #include "Config.h"
 #include "HeuristicResolver.h"
 #include "ParsedAST.h"
+#include "clang/AST/Decl.h"
 #include "clang/AST/DeclarationName.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
@@ -392,6 +393,7 @@ private:
     // Don't show hints for variadic parameters.
     size_t FixedParamCount = getFixedParamCount(Callee);
     size_t ArgCount = std::min(FixedParamCount, Args.size());
+    auto Params = Callee->parameters();
 
     NameVec ParameterNames = chooseParameterNames(Callee, ArgCount);
 
@@ -402,12 +404,14 @@ private:
 
     for (size_t I = 0; I < ArgCount; ++I) {
       StringRef Name = ParameterNames[I];
-      if (!shouldHint(Args[I], Name))
-        continue;
+      bool NameHint = shouldHintName(Args[I], Name);
+      bool ReferenceHint = shouldHintReference(Params[I]);
 
-      addInlayHint(Args[I]->getSourceRange(), HintSide::Left,
-                   InlayHintKind::ParameterHint, /*Prefix=*/"", Name,
-                   /*Suffix=*/": ");
+      if (NameHint || ReferenceHint) {
+        addInlayHint(Args[I]->getSourceRange(), HintSide::Left,
+                     InlayHintKind::ParameterHint, ReferenceHint ? "&" : "",
+                     NameHint ? Name : "", ": ");
+      }
     }
   }
 
@@ -434,12 +438,12 @@ private:
     return WhatItIsSetting.equals_insensitive(ParamNames[0]);
   }
 
-  bool shouldHint(const Expr *Arg, StringRef ParamName) {
+  bool shouldHintName(const Expr *Arg, StringRef ParamName) {
     if (ParamName.empty())
       return false;
 
     // If the argument expression is a single name and it matches the
-    // parameter name exactly, omit the hint.
+    // parameter name exactly, omit the name hint.
     if (ParamName == getSpelledIdentifier(Arg))
       return false;
 
@@ -450,6 +454,13 @@ private:
     return true;
   }
 
+  bool shouldHintReference(const ParmVarDecl *Param) {
+    // If the parameter is a non-const reference type, print an inlay hint
+    auto Type = Param->getType();
+    return Type->isLValueReferenceType() &&
+           !Type.getNonReferenceType().isConstQualified();
+  }
+
   // Checks if "E" is spelled in the main file and preceded by a C-style comment
   // whose contents match ParamName (allowing for whitespace and an optional "="
   // at the end.
@@ -563,7 +574,7 @@ private:
     return Result;
   }
 
-  // We pass HintSide rather than SourceLocation because we want to ensure 
+  // We pass HintSide rather than SourceLocation because we want to ensure
   // it is in the same file as the common file range.
   void addInlayHint(SourceRange R, HintSide Side, InlayHintKind Kind,
                     llvm::StringRef Prefix, llvm::StringRef Label,
diff --git a/clang-tools-extra/clangd/unittests/InlayHintTests.cpp b/clang-tools-extra/clangd/unittests/InlayHintTests.cpp
index b0f8b25..1b39ebb 100644
--- a/clang-tools-extra/clangd/unittests/InlayHintTests.cpp
+++ b/clang-tools-extra/clangd/unittests/InlayHintTests.cpp
@@ -138,6 +138,38 @@ TEST(ParameterHints, NoName) {
   )cpp");
 }
 
+TEST(ParameterHints, NoNameConstReference) {
+  // No hint for anonymous const l-value ref parameter.
+  assertParameterHints(R"cpp(
+    void foo(const int&);
+    void bar() {
+      foo(42);
+    }
+  )cpp");
+}
+
+TEST(ParameterHints, NoNameReference) {
+  // Reference hint for anonymous l-value ref parameter.
+  assertParameterHints(R"cpp(
+    void foo(int&);
+    void bar() {
+      int i;
+      foo($param[[i]]);
+    }
+  )cpp",
+                       ExpectedHint{"&: ", "param"});
+}
+
+TEST(ParameterHints, NoNameRValueReference) {
+  // No reference hint for anonymous r-value ref parameter.
+  assertParameterHints(R"cpp(
+    void foo(int&&);
+    void bar() {
+      foo(42);
+    }
+  )cpp");
+}
+
 TEST(ParameterHints, NameInDefinition) {
   // Parameter name picked up from definition if necessary.
   assertParameterHints(R"cpp(
@@ -162,6 +194,66 @@ TEST(ParameterHints, NameMismatch) {
                        ExpectedHint{"good: ", "good"});
 }
 
+TEST(ParameterHints, NameConstReference) {
+  // Only name hint for const l-value ref parameter.
+  assertParameterHints(R"cpp(
+    void foo(const int& param);
+    void bar() {
+      foo($param[[42]]);
+    }
+  )cpp",
+                       ExpectedHint{"param: ", "param"});
+}
+
+TEST(ParameterHints, NameTypeAliasConstReference) {
+  // Only name hint for const l-value ref parameter via type alias.
+  assertParameterHints(R"cpp(
+    using alias = const int&;
+    void foo(alias param);
+    void bar() {
+      int i;
+      foo($param[[i]]);
+    }
+  )cpp",
+                       ExpectedHint{"param: ", "param"});
+}
+
+TEST(ParameterHints, NameReference) {
+  // Reference and name hint for l-value ref parameter.
+  assertParameterHints(R"cpp(
+    void foo(int& param);
+    void bar() {
+      int i;
+      foo($param[[i]]);
+    }
+  )cpp",
+                       ExpectedHint{"&param: ", "param"});
+}
+
+TEST(ParameterHints, NameTypeAliasReference) {
+  // Reference and name hint for l-value ref parameter via type alias.
+  assertParameterHints(R"cpp(
+    using alias = int&;
+    void foo(alias param);
+    void bar() {
+      int i;
+      foo($param[[i]]);
+    }
+  )cpp",
+                       ExpectedHint{"&param: ", "param"});
+}
+
+TEST(ParameterHints, NameRValueReference) {
+  // Only name hint for r-value ref parameter.
+  assertParameterHints(R"cpp(
+    void foo(int&& param);
+    void bar() {
+      foo($param[[42]]);
+    }
+  )cpp",
+                       ExpectedHint{"param: ", "param"});
+}
+
 TEST(ParameterHints, Operator) {
   // No hint for operator call with operator syntax.
   assertParameterHints(R"cpp(
@@ -301,6 +393,21 @@ TEST(ParameterHints, ArgMatchesParam) {
                        ExpectedHint{"param: ", "param"});
 }
 
+TEST(ParameterHints, ArgMatchesParamReference) {
+  assertParameterHints(R"cpp(
+    void foo(int& param);
+    void foo2(const int& param);
+    void bar() {
+      int param;
+      // show reference hint on mutable reference
+      foo($param[[param]]);
+      // but not on const reference
+      foo2(param);
+    }
+  )cpp",
+                       ExpectedHint{"&: ", "param"});
+}
+
 TEST(ParameterHints, LeadingUnderscore) {
   assertParameterHints(R"cpp(
     void foo(int p1, int _p2, int __p3);
diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp
index 6948422..073a651 100644
--- a/clang-tools-extra/pseudo/lib/Forest.cpp
+++ b/clang-tools-extra/pseudo/lib/Forest.cpp
@@ -74,16 +74,13 @@ std::string ForestNode::dumpRecursive(const Grammar &G,
         } else if (P->kind() == Sequence) {
           Children = P->elements();
           if (Abbreviated) {
-            if (P->startTokenIndex() == End)
-              return;
-            for (size_t I = 0; I < Children.size(); ++I)
-              if (Children[I]->startTokenIndex() == P->startTokenIndex() &&
-                  EndOfElement(I) == End) {
-                return Dump(
-                    Children[I], End,
-                    /*ElidedParent=*/ElidedParent.getValueOr(P->symbol()),
-                    LineDec);
-              }
+            if (Children.size() == 1) {
+              assert(Children[0]->startTokenIndex() == P->startTokenIndex() &&
+                     EndOfElement(0) == End);
+              return Dump(Children[0], End,
+                          /*ElidedParent=*/ElidedParent.getValueOr(P->symbol()),
+                          LineDec);
+            }
           }
         }
 
diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst
index 384b092..d8fa4b3 100644
--- a/clang/docs/ClangCommandLineReference.rst
+++ b/clang/docs/ClangCommandLineReference.rst
@@ -3575,6 +3575,8 @@ PowerPC
 
 .. option:: -mcrbits, -mno-crbits
 
+Control the CR-bit tracking feature on PowerPC. ``-mcrbits`` (the enablement of CR-bit tracking support) is the default for POWER8 and above, as well as for all other CPUs when optimization is applied (-O2 and above).
+
 .. option:: -mcrypto, -mno-crypto
 
 .. option:: -mdirect-move, -mno-direct-move
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 91944e0..ca8d9a4 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -265,6 +265,10 @@ C++ Language Changes in Clang
   ``std::move_if_noexcept``, ``std::addressof``, and ``std::as_const``. These
   are now treated as compiler builtins and implemented directly, rather than
   instantiating the definition from the standard library.
+- Fixed mangling of nested dependent names such as ``T::a::b``, where ``T`` is a
+  template parameter, to conform to the Itanium C++ ABI and be compatible with
+  GCC. This breaks binary compatibility with code compiled with earlier versions
+  of clang; use the ``-fclang-abi-compat=14`` option to get the old mangling.
 
 C++20 Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 2dc3244..8eb6766 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -1219,23 +1219,22 @@ for generating PCH files:
 Using a PCH File
 ^^^^^^^^^^^^^^^^
 
-A PCH file can then be used as a prefix header when a :option:`-include`
+A PCH file can then be used as a prefix header when a ``-include-pch``
 option is passed to ``clang``:
 
 .. code-block:: console
 
-  $ clang -include test.h test.c -o test
+  $ clang -include-pch test.h.pch test.c -o test
 
-The ``clang`` driver will first check if a PCH file for ``test.h`` is
+The ``clang`` driver will check if the PCH file ``test.h.pch`` is
 available; if so, the contents of ``test.h`` (and the files it includes)
-will be processed from the PCH file. Otherwise, Clang falls back to
-directly processing the content of ``test.h``. This mirrors the behavior
-of GCC.
+will be processed from the PCH file. Otherwise, Clang will report an error.
 
 .. note::
 
   Clang does *not* automatically use PCH files for headers that are directly
-  included within a source file. For example:
+  included within a source file or indirectly via :option:`-include`.
+  For example:
 
   .. code-block:: console
 
@@ -1246,7 +1245,7 @@ of GCC.
 
   In this example, ``clang`` will not automatically use the PCH file for
   ``test.h`` since ``test.h`` was included directly in the source file and not
-  specified on the command line using :option:`-include`.
+  specified on the command line using ``-include-pch``.
 
 Relocatable PCH Files
 ^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 2f5336a..4b0b53c 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -2268,6 +2268,25 @@ Finds calls to the ``putenv`` function which pass a pointer to an automatic vari
     return putenv(env); // putenv function should not be called with auto variables
   }
 
+Limitations:
+
+   - Technically, one can pass automatic variables to ``putenv``,
+     but one needs to ensure that the given environment key stays
+     alive until it's removed or overwritten.
+     Since the analyzer cannot keep track of which envvars get overwritten
+     and when, it needs to be slightly more aggressive and warn for such
+     cases too, leading in some cases to false-positive reports like this:
+
+     .. code-block:: c
+
+        void baz() {
+          char env[] = "NAME=value";
+          putenv(env); // false-positive warning: putenv function should not be called...
+          // More code...
+          putenv((char *)"NAME=anothervalue");
+          // This putenv call overwrites the previous entry, thus that can no longer dangle.
+        } // 'env' array becomes dead only here.
+
 alpha.security.cert.env
 ^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index c22957e..e86475b 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -663,6 +663,7 @@ BUILTIN(__builtin_reduce_min, "v.", "nct")
 BUILTIN(__builtin_reduce_xor, "v.", "nct")
 BUILTIN(__builtin_reduce_or, "v.", "nct")
 BUILTIN(__builtin_reduce_and, "v.", "nct")
+BUILTIN(__builtin_reduce_add, "v.", "nct")
 
 BUILTIN(__builtin_matrix_transpose, "v.", "nFt")
 BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 449acfb..6de7e6f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1990,8 +1990,6 @@ TARGET_BUILTIN(__builtin_ia32_selectss_128, "V4fUcV4fV4f", "ncV:128:", "avx512f"
 TARGET_BUILTIN(__builtin_ia32_selectsd_128, "V2dUcV2dV2d", "ncV:128:", "avx512f")
 
 // generic reduction intrinsics
-TARGET_BUILTIN(__builtin_ia32_reduce_add_d512, "iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_add_q512, "OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph512, "xxV32x", "ncV:512:", "avx512fp16")
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index eb4c7c4..83b8c06 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -218,6 +218,10 @@ public:
     /// This causes clang to not pack non-POD members of packed structs.
     Ver13,
 
+    /// Attempt to be ABI-compatible with code generated by Clang 14.0.x.
+    /// This causes clang to mangle dependent nested names incorrectly.
+    Ver14,
+
     /// Conform to the underlying platform's C and C++ ABIs as closely
     /// as we can.
     Latest
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 1147254..7d33b50 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11974,6 +11974,10 @@ public:
   QualType CheckVectorConditionalTypes(ExprResult &Cond, ExprResult &LHS,
                                        ExprResult &RHS,
                                        SourceLocation QuestionLoc);
+
+  QualType CheckSizelessVectorConditionalTypes(ExprResult &Cond,
+                                               ExprResult &LHS, ExprResult &RHS,
+                                               SourceLocation QuestionLoc);
   QualType FindCompositePointerType(SourceLocation Loc, Expr *&E1, Expr *&E2,
                                     bool ConvertArgs = true);
   QualType FindCompositePointerType(SourceLocation Loc,
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 75f0721..f72789d 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -443,6 +443,7 @@ public:
 private:
 
   bool mangleSubstitution(const NamedDecl *ND);
+  bool mangleSubstitution(NestedNameSpecifier *NNS);
   bool mangleSubstitution(QualType T);
   bool mangleSubstitution(TemplateName Template);
   bool mangleSubstitution(uintptr_t Ptr);
@@ -456,6 +457,11 @@ private:
 
     addSubstitution(reinterpret_cast<uintptr_t>(ND));
   }
+  void addSubstitution(NestedNameSpecifier *NNS) {
+    NNS = Context.getASTContext().getCanonicalNestedNameSpecifier(NNS);
+
+    addSubstitution(reinterpret_cast<uintptr_t>(NNS));
+  }
   void addSubstitution(QualType T);
   void addSubstitution(TemplateName Template);
   void addSubstitution(uintptr_t Ptr);
@@ -2036,12 +2042,21 @@ void CXXNameMangler::manglePrefix(NestedNameSpecifier *qualifier) {
     return;
 
   case NestedNameSpecifier::Identifier:
+    // Clang 14 and before did not consider this substitutable.
+    bool Clang14Compat = getASTContext().getLangOpts().getClangABICompat() <=
+                         LangOptions::ClangABI::Ver14;
+    if (!Clang14Compat && mangleSubstitution(qualifier))
+      return;
+
     // Member expressions can have these without prefixes, but that
     // should end up in mangleUnresolvedPrefix instead.
     assert(qualifier->getPrefix());
     manglePrefix(qualifier->getPrefix());
 
     mangleSourceName(qualifier->getAsIdentifier());
+
+    if (!Clang14Compat)
+      addSubstitution(qualifier);
     return;
   }
 
@@ -6009,6 +6024,14 @@ bool CXXNameMangler::mangleSubstitution(const NamedDecl *ND) {
   return mangleSubstitution(reinterpret_cast<uintptr_t>(ND));
 }
 
+bool CXXNameMangler::mangleSubstitution(NestedNameSpecifier *NNS) {
+  assert(NNS->getKind() == NestedNameSpecifier::Identifier &&
+         "mangleSubstitution(NestedNameSpecifier *) is only used for "
+         "identifier nested name specifiers.");
+  NNS = Context.getASTContext().getCanonicalNestedNameSpecifier(NNS);
+  return mangleSubstitution(reinterpret_cast<uintptr_t>(NNS));
+}
+
 /// Determine whether the given type has any qualifiers that are relevant for
 /// substitutions.
 static bool hasMangledSubstitutionQualifiers(QualType T) {
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index c80e4bc..53d43bf 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -6127,17 +6127,13 @@ Stmt *CFGBlock::getTerminatorCondition(bool StripParens) {
 // CFG Graphviz Visualization
 //===----------------------------------------------------------------------===//
 
-#ifndef NDEBUG
-static StmtPrinterHelper* GraphHelper;
-#endif
+static StmtPrinterHelper *GraphHelper;
 
 void CFG::viewCFG(const LangOptions &LO) const {
-#ifndef NDEBUG
   StmtPrinterHelper H(this, LO);
   GraphHelper = &H;
   llvm::ViewGraph(this,"CFG");
   GraphHelper = nullptr;
-#endif
 }
 
 namespace llvm {
@@ -6146,8 +6142,7 @@ template<>
 struct DOTGraphTraits<const CFG*> : public DefaultDOTGraphTraits {
   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
 
-  static std::string getNodeLabel(const CFGBlock *Node, const CFG* Graph) {
-#ifndef NDEBUG
+  static std::string getNodeLabel(const CFGBlock *Node, const CFG *Graph) {
     std::string OutSStr;
     llvm::raw_string_ostream Out(OutSStr);
     print_block(Out,Graph, *Node, *GraphHelper, false, false);
@@ -6163,9 +6158,6 @@ struct DOTGraphTraits<const CFG*> : public DefaultDOTGraphTraits {
       }
 
     return OutStr;
-#else
-    return {};
-#endif
   }
 };
 
diff --git a/clang/lib/Basic/Targets/AVR.cpp b/clang/lib/Basic/Targets/AVR.cpp
index 14b3d04..7be33d4 100644
--- a/clang/lib/Basic/Targets/AVR.cpp
+++ b/clang/lib/Basic/Targets/AVR.cpp
@@ -384,6 +384,9 @@ void AVRTargetInfo::getTargetDefines(const LangOptions &Opts,
     auto It = llvm::find_if(
         AVRMcus, [&](const MCUInfo &Info) { return Info.Name == this->CPU; });
 
+    if (It->IsTiny)
+      Builder.defineMacro("__AVR_TINY__", "1");
+
     if (It != std::end(AVRMcus)) {
       Builder.defineMacro(It->DefineName);
       if (It->NumFlashBanks >= 1)
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index e3ee68c..a2e50f3 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -36,6 +36,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAltivec = true;
     } else if (Feature == "+vsx") {
       HasVSX = true;
+    } else if (Feature == "+crbits") {
+      UseCRBits = true;
     } else if (Feature == "+bpermd") {
       HasBPERMD = true;
     } else if (Feature == "+extdiv") {
@@ -515,6 +517,11 @@ bool PPCTargetInfo::initFeatureMap(
                                 .Case("pwr9", true)
                                 .Case("pwr8", true)
                                 .Default(false);
+  Features["crbits"] = llvm::StringSwitch<bool>(CPU)
+                                .Case("ppc64le", true)
+                                .Case("pwr9", true)
+                                .Case("pwr8", true)
+                                .Default(false);
   Features["vsx"] = llvm::StringSwitch<bool>(CPU)
                         .Case("ppc64le", true)
                         .Case("pwr9", true)
@@ -649,6 +656,7 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
       .Case("powerpc", true)
       .Case("altivec", HasAltivec)
       .Case("vsx", HasVSX)
+      .Case("crbits", UseCRBits)
       .Case("power8-vector", HasP8Vector)
       .Case("crypto", HasP8Crypto)
       .Case("direct-move", HasDirectMove)
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 44489d0..8148762 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -62,6 +62,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
   bool HasROPProtect = false;
   bool HasPrivileged = false;
   bool HasVSX = false;
+  bool UseCRBits = false;
   bool HasP8Vector = false;
   bool HasP8Crypto = false;
   bool HasDirectMove = false;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 70fd869..caea5d1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3273,6 +3273,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
         *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
   }
 
+  case Builtin::BI__builtin_reduce_add:
+    return RValue::get(emitUnaryBuiltin(
+        *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
   case Builtin::BI__builtin_reduce_xor:
     return RValue::get(emitUnaryBuiltin(
         *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
@@ -14493,12 +14496,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
 
   // Reductions
-  case X86::BI__builtin_ia32_reduce_add_d512:
-  case X86::BI__builtin_ia32_reduce_add_q512: {
-    Function *F =
-        CGM.getIntrinsic(Intrinsic::vector_reduce_add, Ops[0]->getType());
-    return Builder.CreateCall(F, {Ops[0]});
-  }
   case X86::BI__builtin_ia32_reduce_fadd_pd512:
   case X86::BI__builtin_ia32_reduce_fadd_ps512:
   case X86::BI__builtin_ia32_reduce_fadd_ph512:
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index d3fe04d..2343c7e 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -4642,7 +4642,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
     return tmp5;
   }
 
-  if (condExpr->getType()->isVectorType()) {
+  if (condExpr->getType()->isVectorType() ||
+      condExpr->getType()->isVLSTBuiltinType()) {
     CGF.incrementProfileCounter(E);
 
     llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index d520d25..1c35ac0 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5857,25 +5857,38 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
   // expression is simple and atomic is allowed for the given type for the
   // target platform.
-  if (BO == BO_Comma || !Update.isScalar() ||
-      !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
+  if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
       (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
        (Update.getScalarVal()->getType() !=
         X.getAddress(CGF).getElementType())) ||
-      !X.getAddress(CGF).getElementType()->isIntegerTy() ||
       !Context.getTargetInfo().hasBuiltinAtomic(
           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
     return std::make_pair(false, RValue::get(nullptr));
 
+  auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
+    if (T->isIntegerTy())
+      return true;
+
+    if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
+      return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T));
+
+    return false;
+  };
+
+  if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
+      !CheckAtomicSupport(X.getAddress(CGF).getElementType(), BO))
+    return std::make_pair(false, RValue::get(nullptr));
+
+  bool IsInteger = X.getAddress(CGF).getElementType()->isIntegerTy();
   llvm::AtomicRMWInst::BinOp RMWOp;
   switch (BO) {
   case BO_Add:
-    RMWOp = llvm::AtomicRMWInst::Add;
+    RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
     break;
   case BO_Sub:
     if (!IsXLHSInRHSPart)
       return std::make_pair(false, RValue::get(nullptr));
-    RMWOp = llvm::AtomicRMWInst::Sub;
+    RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
     break;
   case BO_And:
     RMWOp = llvm::AtomicRMWInst::And;
@@ -5933,9 +5946,13 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
   }
   llvm::Value *UpdateVal = Update.getScalarVal();
   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
-    UpdateVal = CGF.Builder.CreateIntCast(
-        IC, X.getAddress(CGF).getElementType(),
-        X.getType()->hasSignedIntegerRepresentation());
+    if (IsInteger)
+      UpdateVal = CGF.Builder.CreateIntCast(
+          IC, X.getAddress(CGF).getElementType(),
+          X.getType()->hasSignedIntegerRepresentation());
+    else
+      UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC,
+                                         X.getAddress(CGF).getElementType());
   }
   llvm::Value *Res =
       CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
@@ -6297,6 +6314,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
   case OMPC_memory_order:
   case OMPC_bind:
   case OMPC_align:
+  case OMPC_cancellation_construct_type:
     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
   }
 }
diff --git a/clang/lib/Driver/ToolChains/Ananas.cpp b/clang/lib/Driver/ToolChains/Ananas.cpp
index be1476a..40f9e56 100644
--- a/clang/lib/Driver/ToolChains/Ananas.cpp
+++ b/clang/lib/Driver/ToolChains/Ananas.cpp
@@ -85,7 +85,8 @@ void ananas::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     assert(Output.isNothing() && "Invalid output.");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles,
+                   options::OPT_r)) {
     if (!Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
     }
@@ -111,12 +112,15 @@ void ananas::Linker::ConstructJob(Compilation &C, const JobAction &JA,
 
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
 
-  if (ToolChain.ShouldLinkCXXStdlib(Args))
-    ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
-  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs))
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs,
+                   options::OPT_r)) {
+    if (ToolChain.ShouldLinkCXXStdlib(Args))
+      ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
     CmdArgs.push_back("-lc");
+  }
 
-  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles,
+                   options::OPT_r)) {
     if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o")));
     else
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index d20cead..520f73ac 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -2048,6 +2048,11 @@ bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
   nextToken();
   if (FormatTok->is(tok::l_square))
     return false;
+  if (FormatTok->is(tok::r_square)) {
+    const FormatToken *Next = Tokens->peekNextToken();
+    if (Next && Next->is(tok::greater))
+      return false;
+  }
   parseSquare(/*LambdaIntroducer=*/true);
   return true;
 }
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index b35e3f4..847766a 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3518,6 +3518,8 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
     GenerateArg(Args, OPT_fclang_abi_compat_EQ, "12.0", SA);
   else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver13)
     GenerateArg(Args, OPT_fclang_abi_compat_EQ, "13.0", SA);
+  else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver14)
+    GenerateArg(Args, OPT_fclang_abi_compat_EQ, "14.0", SA);
 
   if (Opts.getSignReturnAddressScope() ==
       LangOptions::SignReturnAddressScopeKind::All)
@@ -4026,6 +4028,8 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
         Opts.setClangABICompat(LangOptions::ClangABI::Ver12);
       else if (Major <= 13)
         Opts.setClangABICompat(LangOptions::ClangABI::Ver13);
+      else if (Major <= 14)
+        Opts.setClangABICompat(LangOptions::ClangABI::Ver14);
     } else if (Ver != "latest") {
       Diags.Report(diag::err_drv_invalid_value)
           << A->getAsString(Args) << A->getValue();
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 592e7b0..847d366 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -1249,6 +1249,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
     case 52:
       Builder.defineMacro("_OPENMP", "202111");
       break;
+    case 50:
     default:
       // Default version is OpenMP 5.0
       Builder.defineMacro("_OPENMP", "201811");
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index d10493f..c7f3c96 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -9319,7 +9319,7 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
  */
 
 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
-  return __builtin_ia32_reduce_add_q512(__W);
+  return __builtin_reduce_add((__v8di)__W);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
@@ -9337,7 +9337,7 @@ static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
   __W = _mm512_maskz_mov_epi64(__M, __W);
-  return __builtin_ia32_reduce_add_q512(__W);
+  return __builtin_reduce_add((__v8di)__W);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
@@ -9383,7 +9383,7 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_add_epi32(__m512i __W) {
-  return __builtin_ia32_reduce_add_d512((__v16si)__W);
+  return __builtin_reduce_add((__v16si)__W);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
@@ -9404,7 +9404,7 @@ _mm512_reduce_or_epi32(__m512i __W) {
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
   __W = _mm512_maskz_mov_epi32(__M, __W);
-  return __builtin_ia32_reduce_add_d512((__v16si)__W);
+  return __builtin_reduce_add((__v16si)__W);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 467edd3..18387c2 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2331,6 +2331,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   }
 
   // These builtins support vectors of integers only.
+  // TODO: ADD should support floating-point types.
+  case Builtin::BI__builtin_reduce_add:
   case Builtin::BI__builtin_reduce_xor:
   case Builtin::BI__builtin_reduce_or:
   case Builtin::BI__builtin_reduce_and: {
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 5890bbc..f2b87c6 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -9632,11 +9632,15 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     }
 
     if (isFriend) {
+      // In MSVC mode for older versions of the standard, friend function
+      // declarations behave as declarations
+      bool PerformFriendInjection =
+          getLangOpts().MSVCCompat && !getLangOpts().CPlusPlus20;
       if (FunctionTemplate) {
-        FunctionTemplate->setObjectOfFriendDecl();
+        FunctionTemplate->setObjectOfFriendDecl(PerformFriendInjection);
         FunctionTemplate->setAccess(AS_public);
       }
-      NewFD->setObjectOfFriendDecl();
+      NewFD->setObjectOfFriendDecl(PerformFriendInjection);
       NewFD->setAccess(AS_public);
     }
 
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 2af4a85..27ec863 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -23,6 +23,7 @@
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/AlignedAllocation.h"
+#include "clang/Basic/DiagnosticSema.h"
 #include "clang/Basic/PartialDiagnostic.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TypeTraits.h"
@@ -40,6 +41,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TypeSize.h"
 using namespace clang;
 using namespace sema;
 
@@ -6108,6 +6110,16 @@ static bool isValidVectorForConditionalCondition(ASTContext &Ctx,
   return EltTy->isIntegralType(Ctx);
 }
 
+static bool isValidSizelessVectorForConditionalCondition(ASTContext &Ctx,
+                                                         QualType CondTy) {
+  if (!CondTy->isVLSTBuiltinType())
+    return false;
+  const QualType EltTy =
+      cast<BuiltinType>(CondTy.getCanonicalType())->getSveEltType(Ctx);
+  assert(!EltTy->isEnumeralType() && "Vectors cant be enum types");
+  return EltTy->isIntegralType(Ctx);
+}
+
 QualType Sema::CheckVectorConditionalTypes(ExprResult &Cond, ExprResult &LHS,
                                            ExprResult &RHS,
                                            SourceLocation QuestionLoc) {
@@ -6199,6 +6211,89 @@ QualType Sema::CheckVectorConditionalTypes(ExprResult &Cond, ExprResult &LHS,
   return ResultType;
 }
 
+QualType Sema::CheckSizelessVectorConditionalTypes(ExprResult &Cond,
+                                                   ExprResult &LHS,
+                                                   ExprResult &RHS,
+                                                   SourceLocation QuestionLoc) {
+  LHS = DefaultFunctionArrayLvalueConversion(LHS.get());
+  RHS = DefaultFunctionArrayLvalueConversion(RHS.get());
+
+  QualType CondType = Cond.get()->getType();
+  const auto *CondBT = CondType->castAs<BuiltinType>();
+  QualType CondElementTy = CondBT->getSveEltType(Context);
+  llvm::ElementCount CondElementCount =
+      Context.getBuiltinVectorTypeInfo(CondBT).EC;
+
+  QualType LHSType = LHS.get()->getType();
+  const auto *LHSBT =
+      LHSType->isVLSTBuiltinType() ? LHSType->getAs<BuiltinType>() : nullptr;
+  QualType RHSType = RHS.get()->getType();
+  const auto *RHSBT =
+      RHSType->isVLSTBuiltinType() ? RHSType->getAs<BuiltinType>() : nullptr;
+
+  QualType ResultType;
+
+  if (LHSBT && RHSBT) {
+    // If both are sizeless vector types, they must be the same type.
+    if (!Context.hasSameType(LHSType, RHSType)) {
+      Diag(QuestionLoc, diag::err_conditional_vector_mismatched)
+          << LHSType << RHSType;
+      return QualType();
+    }
+    ResultType = LHSType;
+  } else if (LHSBT || RHSBT) {
+    ResultType = CheckSizelessVectorOperands(
+        LHS, RHS, QuestionLoc, /*IsCompAssign*/ false, ACK_Conditional);
+    if (ResultType.isNull())
+      return QualType();
+  } else {
+    // Both are scalar so splat
+    QualType ResultElementTy;
+    LHSType = LHSType.getCanonicalType().getUnqualifiedType();
+    RHSType = RHSType.getCanonicalType().getUnqualifiedType();
+
+    if (Context.hasSameType(LHSType, RHSType))
+      ResultElementTy = LHSType;
+    else
+      ResultElementTy =
+          UsualArithmeticConversions(LHS, RHS, QuestionLoc, ACK_Conditional);
+
+    if (ResultElementTy->isEnumeralType()) {
+      Diag(QuestionLoc, diag::err_conditional_vector_operand_type)
+          << ResultElementTy;
+      return QualType();
+    }
+
+    ResultType = Context.getScalableVectorType(
+        ResultElementTy, CondElementCount.getKnownMinValue());
+
+    LHS = ImpCastExprToType(LHS.get(), ResultType, CK_VectorSplat);
+    RHS = ImpCastExprToType(RHS.get(), ResultType, CK_VectorSplat);
+  }
+
+  assert(!ResultType.isNull() && ResultType->isVLSTBuiltinType() &&
+         "Result should have been a vector type");
+  auto *ResultBuiltinTy = ResultType->castAs<BuiltinType>();
+  QualType ResultElementTy = ResultBuiltinTy->getSveEltType(Context);
+  llvm::ElementCount ResultElementCount =
+      Context.getBuiltinVectorTypeInfo(ResultBuiltinTy).EC;
+
+  if (ResultElementCount != CondElementCount) {
+    Diag(QuestionLoc, diag::err_conditional_vector_size)
+        << CondType << ResultType;
+    return QualType();
+  }
+
+  if (Context.getTypeSize(ResultElementTy) !=
+      Context.getTypeSize(CondElementTy)) {
+    Diag(QuestionLoc, diag::err_conditional_vector_element_size)
+        << CondType << ResultType;
+    return QualType();
+  }
+
+  return ResultType;
+}
+
 /// Check the operands of ?: under C++ semantics.
 ///
 /// See C++ [expr.cond]. Note that LHS is never null, even for the GNU x ?: y
@@ -6232,10 +6327,14 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS,
   bool IsVectorConditional =
       isValidVectorForConditionalCondition(Context, Cond.get()->getType());
 
+  bool IsSizelessVectorConditional =
+      isValidSizelessVectorForConditionalCondition(Context,
+                                                   Cond.get()->getType());
+
   // C++11 [expr.cond]p1
   //   The first expression is contextually converted to bool.
   if (!Cond.get()->isTypeDependent()) {
-    ExprResult CondRes = IsVectorConditional
+    ExprResult CondRes = IsVectorConditional || IsSizelessVectorConditional
                              ? DefaultFunctionArrayLvalueConversion(Cond.get())
                              : CheckCXXBooleanCondition(Cond.get());
     if (CondRes.isInvalid())
@@ -6304,6 +6403,9 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS,
   if (IsVectorConditional)
     return CheckVectorConditionalTypes(Cond, LHS, RHS, QuestionLoc);
 
+  if (IsSizelessVectorConditional)
+    return CheckSizelessVectorConditionalTypes(Cond, LHS, RHS, QuestionLoc);
+
   // C++11 [expr.cond]p3
   //   Otherwise, if the second and third operand have different types, and
   //   either has (cv) class type [...] an attempt is made to convert each of
diff --git a/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp
index a50674d..895212d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp
@@ -35,9 +35,9 @@ namespace {
 class InnerPointerChecker
     : public Checker<check::DeadSymbols, check::PostCall> {
 
-  CallDescription AppendFn, AssignFn, AddressofFn, ClearFn, CStrFn, DataFn,
-      DataMemberFn, EraseFn, InsertFn, PopBackFn, PushBackFn, ReplaceFn,
-      ReserveFn, ResizeFn, ShrinkToFitFn, SwapFn;
+  CallDescription AppendFn, AssignFn, AddressofFn, AddressofFn_, ClearFn,
+      CStrFn, DataFn, DataMemberFn, EraseFn, InsertFn, PopBackFn, PushBackFn,
+      ReplaceFn, ReserveFn, ResizeFn, ShrinkToFitFn, SwapFn;
 
 public:
   class InnerPointerBRVisitor : public BugReporterVisitor {
@@ -74,7 +74,7 @@ public:
   InnerPointerChecker()
       : AppendFn({"std", "basic_string", "append"}),
         AssignFn({"std", "basic_string", "assign"}),
-        AddressofFn({"std", "addressof"}),
+        AddressofFn({"std", "addressof"}), AddressofFn_({"std", "__addressof"}),
         ClearFn({"std", "basic_string", "clear"}),
         CStrFn({"std", "basic_string", "c_str"}), DataFn({"std", "data"}, 1),
         DataMemberFn({"std", "basic_string", "data"}),
@@ -179,9 +179,9 @@ void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call,
       if (!ArgRegion)
         continue;
 
-      // std::addressof function accepts a non-const reference as an argument,
+      // std::addressof functions accepts a non-const reference as an argument,
       // but doesn't modify it.
-      if (AddressofFn.matches(Call))
+      if (matchesAny(Call, AddressofFn, AddressofFn_))
         continue;
 
       markPtrSymbolsReleased(Call, State, ArgRegion, C);
diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 0bbef39..ae9a7bc 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -1408,8 +1408,8 @@ void MallocChecker::checkGMalloc0(const CallEvent &Call,
 void MallocChecker::checkGMemdup(const CallEvent &Call,
                                  CheckerContext &C) const {
   ProgramStateRef State = C.getState();
-  State = MallocMemAux(C, Call, Call.getArgExpr(1), UndefinedVal(), State,
-                       AF_Malloc);
+  State =
+      MallocMemAux(C, Call, Call.getArgExpr(1), UnknownVal(), State, AF_Malloc);
   State = ProcessZeroAllocCheck(Call, 1, State);
   C.addTransition(State);
 }
diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 099e70a..b16e1f0 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -672,24 +672,19 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc,
   if (!IsFread || (OldSS->ErrorState != ErrorFEof)) {
     ProgramStateRef StateNotFailed =
         State->BindExpr(CE, C.getLocationContext(), *NMembVal);
-    if (StateNotFailed) {
-      StateNotFailed = StateNotFailed->set<StreamMap>(
-          StreamSym, StreamState::getOpened(Desc));
-      C.addTransition(StateNotFailed);
-    }
+    StateNotFailed =
+        StateNotFailed->set<StreamMap>(StreamSym, StreamState::getOpened(Desc));
+    C.addTransition(StateNotFailed);
   }
 
   // Add transition for the failed state.
-  Optional<NonLoc> RetVal = makeRetVal(C, CE).castAs<NonLoc>();
-  assert(RetVal && "Value should be NonLoc.");
+  NonLoc RetVal = makeRetVal(C, CE).castAs<NonLoc>();
   ProgramStateRef StateFailed =
-      State->BindExpr(CE, C.getLocationContext(), *RetVal);
-  if (!StateFailed)
-    return;
-  auto Cond = C.getSValBuilder()
-                  .evalBinOpNN(State, BO_LT, *RetVal, *NMembVal,
-                               C.getASTContext().IntTy)
-                  .getAs<DefinedOrUnknownSVal>();
+      State->BindExpr(CE, C.getLocationContext(), RetVal);
+  auto Cond =
+      C.getSValBuilder()
+          .evalBinOpNN(State, BO_LT, RetVal, *NMembVal, C.getASTContext().IntTy)
+          .getAs<DefinedOrUnknownSVal>();
   if (!Cond)
     return;
   StateFailed = StateFailed->assume(*Cond, true);
diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
index 02a8d6a..4416209 100644
--- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -37,7 +37,9 @@ void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
     Out << I.first << " : " << I.second << NL;
 }
 
-void dumpTaint(ProgramStateRef State) { printTaint(State, llvm::errs()); }
+void taint::dumpTaint(ProgramStateRef State) {
+  printTaint(State, llvm::errs());
+}
 
 ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
                                 const LocationContext *LCtx,
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index ec7ab25..e748a35 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -3119,7 +3119,6 @@ void ExprEngine::VisitMSAsmStmt(const MSAsmStmt *A, ExplodedNode *Pred,
 // Visualization.
 //===----------------------------------------------------------------------===//
 
-#ifndef NDEBUG
 namespace llvm {
 
 template<>
@@ -3217,29 +3216,18 @@ struct DOTGraphTraits<ExplodedGraph*> : public DefaultDOTGraphTraits {
 };
 
 } // namespace llvm
-#endif
 
 void ExprEngine::ViewGraph(bool trim) {
-#ifndef NDEBUG
   std::string Filename = DumpGraph(trim);
   llvm::DisplayGraph(Filename, false, llvm::GraphProgram::DOT);
-#else
-  llvm::errs() << "Warning: viewing graph requires assertions" << "\n";
-#endif
 }
 
-
-void ExprEngine::ViewGraph(ArrayRef<const ExplodedNode*> Nodes) {
-#ifndef NDEBUG
+void ExprEngine::ViewGraph(ArrayRef<const ExplodedNode *> Nodes) {
   std::string Filename = DumpGraph(Nodes);
   llvm::DisplayGraph(Filename, false, llvm::GraphProgram::DOT);
-#else
-  llvm::errs() << "Warning: viewing graph requires assertions" << "\n";
-#endif
 }
 
 std::string ExprEngine::DumpGraph(bool trim, StringRef Filename) {
-#ifndef NDEBUG
   if (trim) {
     std::vector<const ExplodedNode *> Src;
 
@@ -3254,35 +3242,26 @@ std::string ExprEngine::DumpGraph(bool trim, StringRef Filename) {
       Src.push_back(N);
     }
     return DumpGraph(Src, Filename);
-  } else {
-    return llvm::WriteGraph(&G, "ExprEngine", /*ShortNames=*/false,
-                            /*Title=*/"Exploded Graph",
-                            /*Filename=*/std::string(Filename));
   }
-#else
-  llvm::errs() << "Warning: dumping graph requires assertions" << "\n";
-  return "";
-#endif
+
+  return llvm::WriteGraph(&G, "ExprEngine", /*ShortNames=*/false,
+                          /*Title=*/"Exploded Graph",
+                          /*Filename=*/std::string(Filename));
 }
 
-std::string ExprEngine::DumpGraph(ArrayRef<const ExplodedNode*> Nodes,
+std::string ExprEngine::DumpGraph(ArrayRef<const ExplodedNode *> Nodes,
                                   StringRef Filename) {
-#ifndef NDEBUG
   std::unique_ptr<ExplodedGraph> TrimmedG(G.trim(Nodes));
 
   if (!TrimmedG.get()) {
     llvm::errs() << "warning: Trimmed ExplodedGraph is empty.\n";
     return "";
-  } else {
-    return llvm::WriteGraph(TrimmedG.get(), "TrimmedExprEngine",
-                            /*ShortNames=*/false,
-                            /*Title=*/"Trimmed Exploded Graph",
-                            /*Filename=*/std::string(Filename));
-  }
-#else
-  llvm::errs() << "Warning: dumping graph requires assertions" << "\n";
-  return "";
-#endif
+  }
+
+  return llvm::WriteGraph(TrimmedG.get(), "TrimmedExprEngine",
+                          /*ShortNames=*/false,
+                          /*Title=*/"Trimmed Exploded Graph",
+                          /*Filename=*/std::string(Filename));
 }
 
 void *ProgramStateTrait<ReplayWithoutInlining>::GDMIndex() {
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index 9d062dd..20b167c 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -1983,15 +1983,9 @@ SVal RegionStoreManager::getBindingForField(RegionBindingsConstRef B,
   if (const Optional<SVal> &V = B.getDirectBinding(R))
     return *V;
 
-  // Is the field declared constant and has an in-class initializer?
+  // If the containing record was initialized, try to get its constant value.
   const FieldDecl *FD = R->getDecl();
   QualType Ty = FD->getType();
-  if (Ty.isConstQualified())
-    if (const Expr *Init = FD->getInClassInitializer())
-      if (Optional<SVal> V = svalBuilder.getConstantVal(Init))
-        return *V;
-
-  // If the containing record was initialized, try to get its constant value.
   const MemRegion* superR = R->getSuperRegion();
   if (const auto *VR = dyn_cast<VarRegion>(superR)) {
     const VarDecl *VD = VR->getDecl();
diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
index e045c9a..12b4642 100644
--- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
@@ -983,8 +983,8 @@ SVal SValBuilder::evalCastSubKind(nonloc::SymbolVal V, QualType CastTy,
 
     // Produce SymbolCast if CastTy and T are different integers.
     // NOTE: In the end the type of SymbolCast shall be equal to CastTy.
-    if (T->isIntegralOrEnumerationType() &&
-        CastTy->isIntegralOrEnumerationType()) {
+    if (T->isIntegralOrUnscopedEnumerationType() &&
+        CastTy->isIntegralOrUnscopedEnumerationType()) {
       AnalyzerOptions &Opts =
           StateMgr.getOwningEngine().getAnalysisManager().getAnalyzerOptions();
       // If appropriate option is disabled, ignore the cast.
diff --git a/clang/test/Analysis/cxx-member-initializer-const-field.cpp b/clang/test/Analysis/cxx-member-initializer-const-field.cpp
new file mode 100644
index 0000000..f0abbdd
--- /dev/null
+++ b/clang/test/Analysis/cxx-member-initializer-const-field.cpp
@@ -0,0 +1,120 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify %s
+
+// This tests false-positive issues related to PR48534.
+//
+// Essentially, having a default member initializer for a constant member does
+// not necessarily imply the member will have the given default value.
+
+struct WithConstructor {
+  int *const ptr = nullptr;
+  WithConstructor(int *x) : ptr(x) {}
+
+  static auto compliant() {
+    WithConstructor c(new int);
+    return *(c.ptr); // no warning
+  }
+
+  static auto compliantWithParam(WithConstructor c) {
+    return *(c.ptr); // no warning
+  }
+
+  static auto issue() {
+    WithConstructor c(nullptr);
+    return *(c.ptr); // expected-warning{{Dereference of null pointer (loaded from field 'ptr')}}
+  }
+};
+
+struct RegularAggregate {
+  int *const ptr = nullptr;
+
+  static int compliant() {
+    RegularAggregate c{new int};
+    return *(c.ptr); // no warning
+  }
+
+  static int issue() {
+    RegularAggregate c;
+    return *(c.ptr); // expected-warning{{Dereference of null pointer (loaded from field 'ptr')}}
+  }
+};
+
+struct WithConstructorAndArithmetic {
+  int const i = 0;
+  WithConstructorAndArithmetic(int x) : i(x + 1) {}
+
+  static int compliant(int y) {
+    WithConstructorAndArithmetic c(0);
+    return y / c.i; // no warning
+  }
+
+  static int issue(int y) {
+    WithConstructorAndArithmetic c(-1);
+    return y / c.i; // expected-warning{{Division by zero}}
+  }
+};
+
+struct WithConstructorDeclarationOnly {
+  int const i = 0;
+  WithConstructorDeclarationOnly(int x); // definition not visible.
+
+  static int compliant1(int y) {
+    WithConstructorDeclarationOnly c(0);
+    return y / c.i; // no warning
+  }
+
+  static int compliant2(int y) {
+    WithConstructorDeclarationOnly c(-1);
+    return y / c.i; // no warning
+  }
+};
+
+// NonAggregateFP is not an aggregate (j is a private non-static field) and has no custom constructor.
+// So we know i and j will always be 0 and 42, respectively.
+// That being said, this is not implemented because it is deemed too rare to be worth the complexity.
+struct NonAggregateFP {
+public:
+  int const i = 0;
+
+private:
+  int const j = 42;
+
+public:
+  static int falsePositive1(NonAggregateFP c) {
+    return 10 / c.i; // FIXME: Currently, no warning.
+  }
+
+  static int falsePositive2(NonAggregateFP c) {
+    return 10 / (c.j - 42); // FIXME: Currently, no warning.
+  }
+};
+
+struct NonAggregate {
+public:
+  int const i = 0;
+
+private:
+  int const j = 42;
+
+  NonAggregate(NonAggregate const &); // not provided, could set i and j to arbitrary values.
+
+public:
+  static int compliant1(NonAggregate c) {
+    return 10 / c.i; // no warning
+  }
+
+  static int compliant2(NonAggregate c) {
+    return 10 / (c.j - 42); // no warning
+  }
+};
+
+struct WithStaticMember {
+  static int const i = 0;
+
+  static int issue1(WithStaticMember c) {
+    return 10 / c.i; // expected-warning{{division by zero is undefined}} expected-warning{{Division by zero}}
+  }
+
+  static int issue2() {
+    return 10 / WithStaticMember::i; // expected-warning{{division by zero is undefined}} expected-warning{{Division by zero}}
+  }
+};
diff --git a/clang/test/Analysis/debug-checkers.cpp b/clang/test/Analysis/debug-checkers.cpp
new file mode 100644
index 0000000..234edd7
--- /dev/null
+++ b/clang/test/Analysis/debug-checkers.cpp
@@ -0,0 +1,51 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.DumpDominators %s > %t 2>&1
+// RUN: FileCheck --input-file=%t %s -check-prefix=DOM-CHECK
+// DOM-CHECK: Immediate dominance tree (Node#,IDom#)
+
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.DumpPostDominators %s > %t 2>&1
+// RUN: FileCheck --input-file=%t %s -check-prefix=POSTDOM-CHECK
+// POSTDOM-CHECK: Immediate post dominance tree (Node#,IDom#)
+
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.DumpControlDependencies %s > %t 2>&1
+// RUN: FileCheck --input-file=%t %s -check-prefix=CTRLDEPS-CHECK
+// CTRLDEPS-CHECK: Control dependencies (Node#,Dependency#)
+
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.DumpLiveVars %s > %t 2>&1
+// RUN: FileCheck --input-file=%t %s -check-prefix=LIVE-VARS-CHECK
+// LIVE-VARS-CHECK: live variables at block exit
+
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.DumpLiveExprs %s > %t 2>&1
+// RUN: FileCheck --input-file=%t %s -check-prefix=LIVE-EXPRS-CHECK
+// LIVE-EXPRS-CHECK: live expressions at block exit
+
+// Skip testing CFGViewer.
+
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.DumpCFG %s > %t 2>&1
+// RUN: FileCheck --input-file=%t %s -check-prefix=CFG-CHECK
+// CFG-CHECK: ENTRY
+
+// Skip testing CallGraphViewer.
+
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.DumpCallGraph %s > %t 2>&1
+// RUN: FileCheck --input-file=%t %s -check-prefix=CALL-GRAPH-CHECK
+// CALL-GRAPH-CHECK: --- Call graph Dump ---
+
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ConfigDumper %s > %t 2>&1
+// RUN: FileCheck --input-file=%t %s -check-prefix=CONFIG-CHECK
+// CONFIG-CHECK: [config]
+
+// Skip testing ExplodedGraphViewer.
+
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ReportStmts %s > %t 2>&1
+// RUN: FileCheck --input-file=%t %s -check-prefix=REPORT-STMTS-CHECK
+// REPORT-STMTS-CHECK: warning: Statement
+
+void foo(int *p) {
+  *p = 3;
+}
+
+int bar() {
+  int x;
+  foo(&x);
+  return x;
+}
diff --git a/clang/test/Analysis/dump_egraph.c b/clang/test/Analysis/dump_egraph.c
index a2a916d..06c3ccd 100644
--- a/clang/test/Analysis/dump_egraph.c
+++ b/clang/test/Analysis/dump_egraph.c
@@ -7,8 +7,6 @@
 // RUN:  -trim-egraph %s
 // RUN: cat %t.dot | FileCheck %s
 
-// REQUIRES: asserts
-
 int getJ(void);
 
 int foo(void) {
@@ -29,7 +27,7 @@ int foo(void) {
 // CHECK-SAME: \{ \"kind\": \"Statement\", \"stmt_kind\": \"IntegerLiteral\",
 // CHECK-SAME:    \"stmt_id\": {{[0-9]*}}, \"pointer\": \"0x{{[0-9a-f]*}}\",
 // CHECK-SAME:    \"pretty\": \"0\", \"location\": \{
-// CHECK-SAME:        \"line\": 15, \"column\": 12, \"file\":
+// CHECK-SAME:        \"line\": 13, \"column\": 12, \"file\":
 // CHECK-SAME:    \}, \"stmt_point_kind\": \"PreStmtPurgeDeadSymbols\",
 // CHECK-SAME:    \"tag\": \"ExprEngine : Clean Node\", \"node_id\": 3,
 // CHECK-SAME:    \"is_sink\": 0, \"has_report\": 0
@@ -37,13 +35,13 @@ int foo(void) {
 // CHECK-SAME: \{ \"kind\": \"Statement\", \"stmt_kind\": \"IntegerLiteral\",
 // CHECK-SAME:    \"stmt_id\": {{[0-9]*}}, \"pointer\": \"0x{{[0-9a-f]*}}\",
 // CHECK-SAME:    \"pretty\": \"0\", \"location\": \{
-// CHECK-SAME:        \"line\": 15, \"column\": 12, \"file\":
+// CHECK-SAME:        \"line\": 13, \"column\": 12, \"file\":
 // CHECK-SAME:    \}, \"stmt_point_kind\": \"PostStmt\", \"tag\": null,
 // CHECK-SAME:    \"node_id\": 4, \"is_sink\": 0, \"has_report\": 0
 // CHECK-SAME: \}
 // CHECK-SAME: ]
 
-// CHECK: \"pretty\": \"*x\", \"location\": \{ \"line\": 18, \"column\": 10, \"file\": \"{{(.+)}}dump_egraph.c\" \}
+// CHECK: \"pretty\": \"*x\", \"location\": \{ \"line\": 16, \"column\": 10, \"file\": \"{{(.+)}}dump_egraph.c\" \}
 
 // CHECK: \"pretty\": \"'\\\\x13'\"
 
diff --git a/clang/test/Analysis/dump_egraph.cpp b/clang/test/Analysis/dump_egraph.cpp
index 9b87d1a..f695362 100644
--- a/clang/test/Analysis/dump_egraph.cpp
+++ b/clang/test/Analysis/dump_egraph.cpp
@@ -1,6 +1,5 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core -analyzer-dump-egraph=%t.dot %s
 // RUN: cat %t.dot | FileCheck %s
-// REQUIRES: asserts
 
 struct S {
   ~S();
@@ -20,7 +19,7 @@ void foo() {
 
 // CHECK: \"location_context\": \"#0 Call\", \"calling\": \"foo\", \"location\": null, \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"stmt_id\": {{[0-9]+}}, \"kind\": \"construct into local variable\", \"argument_index\": null, \"pretty\": \"T t;\", \"value\": \"&t\"
 
-// CHECK: \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"location\": \{ \"line\": 16, \"column\": 5, \"file\": \"{{.*}}dump_egraph.cpp\" \}, \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"init_id\": {{[0-9]+}}, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t.s\"
+// CHECK: \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"location\": \{ \"line\": 15, \"column\": 5, \"file\": \"{{.*}}dump_egraph.cpp\" \}, \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"init_id\": {{[0-9]+}}, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t.s\"
 
 // CHECK: \"cluster\": \"t\", \"pointer\": \"{{0x[0-9a-f]+}}\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"kind\": \"Default\", \"offset\": 0, \"value\": \"conj_$2\{int, LC5, no stmt, #1\}\"
 
diff --git a/clang/test/Analysis/exploded-graph-rewriter/dynamic_types.cpp b/clang/test/Analysis/exploded-graph-rewriter/dynamic_types.cpp
index 25583e6..b9e7671 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/dynamic_types.cpp
+++ b/clang/test/Analysis/exploded-graph-rewriter/dynamic_types.cpp
@@ -3,7 +3,6 @@
 // RUN:                     -analyzer-checker=core \
 // RUN:                     -analyzer-dump-egraph=%t.dot %s
 // RUN: %exploded_graph_rewriter %t.dot | FileCheck %s
-// REQUIRES: asserts
 
 struct S {};
 
diff --git a/clang/test/Analysis/exploded-graph-rewriter/escapes.c b/clang/test/Analysis/exploded-graph-rewriter/escapes.c
index 5fdf9c3..830e082 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/escapes.c
+++ b/clang/test/Analysis/exploded-graph-rewriter/escapes.c
@@ -3,7 +3,6 @@
 // RUN:                     -analyzer-checker=core \
 // RUN:                     -analyzer-dump-egraph=%t.dot %s
 // RUN: %exploded_graph_rewriter %t.dot | FileCheck %s
-// REQUIRES: asserts
 
 void escapes(void) {
   // CHECK: <td align="left"><b>Store: </b> <font color="gray">(0x{{[0-9a-f]*}})</font></td>
diff --git a/clang/test/Analysis/exploded-graph-rewriter/initializers_under_construction.cpp b/clang/test/Analysis/exploded-graph-rewriter/initializers_under_construction.cpp
index aa28a1d..82b7f32 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/initializers_under_construction.cpp
+++ b/clang/test/Analysis/exploded-graph-rewriter/initializers_under_construction.cpp
@@ -3,7 +3,6 @@
 // RUN:                     -analyzer-checker=core \
 // RUN:                     -analyzer-dump-egraph=%t.dot %s
 // RUN: %exploded_graph_rewriter %t.dot | FileCheck %s
-// REQUIRES: asserts
 
 struct A {
   A() {}
diff --git a/clang/test/Analysis/exploded-graph-rewriter/l_name_starts_with_l.cpp b/clang/test/Analysis/exploded-graph-rewriter/l_name_starts_with_l.cpp
index a5d2443..f18c4c5 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/l_name_starts_with_l.cpp
+++ b/clang/test/Analysis/exploded-graph-rewriter/l_name_starts_with_l.cpp
@@ -4,7 +4,6 @@
 // RUN:                     -analyzer-checker=core \
 // RUN:                     -analyzer-dump-egraph=%t.dot %s
 // RUN: %exploded_graph_rewriter %t.dot | FileCheck %s
-// REQUIRES: asserts
 
 void test1() {
   // Here __FILE__ macros produces a string with `\` delimiters on Windows
diff --git a/clang/test/Analysis/exploded-graph-rewriter/macros.c b/clang/test/Analysis/exploded-graph-rewriter/macros.c
index 5ae5347..f47342f 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/macros.c
+++ b/clang/test/Analysis/exploded-graph-rewriter/macros.c
@@ -11,7 +11,6 @@ void *foo(void) {
 // RUN:                     -analyzer-checker=core \
 // RUN:                     -analyzer-dump-egraph=%t.dot %s
 // RUN: %exploded_graph_rewriter %t.dot | FileCheck %s
-// REQUIRES: asserts
 
 // CHECK: macros.c:<b>3</b>:<b>10</b>
 // CHECK-SAME: <font color="royalblue1">
diff --git a/clang/test/Analysis/exploded-graph-rewriter/objects_under_construction.cpp b/clang/test/Analysis/exploded-graph-rewriter/objects_under_construction.cpp
index 69cec58..0511280 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/objects_under_construction.cpp
+++ b/clang/test/Analysis/exploded-graph-rewriter/objects_under_construction.cpp
@@ -4,7 +4,6 @@
 // RUN:                     -analyzer-checker=core \
 // RUN:                     -analyzer-dump-egraph=%t.dot %s
 // RUN: %exploded_graph_rewriter %t.dot | FileCheck %s
-// REQUIRES: asserts
 
 struct S {
   S() {}
diff --git a/clang/test/Analysis/exploded-graph-rewriter/win_path_forbidden_chars.cpp b/clang/test/Analysis/exploded-graph-rewriter/win_path_forbidden_chars.cpp
index 4eac964..4c217e6 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/win_path_forbidden_chars.cpp
+++ b/clang/test/Analysis/exploded-graph-rewriter/win_path_forbidden_chars.cpp
@@ -3,7 +3,6 @@
 // RUN:                     -analyzer-checker=core \
 // RUN:                     -analyzer-dump-egraph=%t.dot %s
 // RUN: %exploded_graph_rewriter --verbose %t.dot 2>&1 | FileCheck %s
-// REQUIRES: asserts
 // UNSUPPORTED: !windows
 
 // Angle brackets shall not be presented in the field `file`,
diff --git a/clang/test/Analysis/gmalloc.c b/clang/test/Analysis/gmalloc.c
index 51f9728..240138e 100644
--- a/clang/test/Analysis/gmalloc.c
+++ b/clang/test/Analysis/gmalloc.c
@@ -21,6 +21,7 @@ gpointer g_try_malloc0_n(gsize n_blocks, gsize n_block_bytes);
 gpointer g_try_realloc_n(gpointer mem, gsize n_blocks, gsize n_block_bytes);
 void g_free(gpointer mem);
 gpointer g_memdup(gconstpointer mem, guint byte_size);
+gpointer g_strconcat(gconstpointer string1, ...);
 
 static const gsize n_bytes = 1024;
 
@@ -167,3 +168,16 @@ void f7(void) {
   g_free(g6);
   g_free(g7);
 }
+
+void f8(void) {
+  typedef struct {
+    gpointer str;
+  } test_struct;
+
+  test_struct *s1 = (test_struct *)g_malloc0(sizeof(test_struct));
+  test_struct *s2 = (test_struct *)g_memdup(s1, sizeof(test_struct));
+  gpointer str = g_strconcat("text", s1->str, s2->str, NULL); // no-warning
+  g_free(str);
+  g_free(s2);
+  g_free(s1);
+}
diff --git a/clang/test/Analysis/inner-pointer.cpp b/clang/test/Analysis/inner-pointer.cpp
index 5db17a9..4cb1944 100644
--- a/clang/test/Analysis/inner-pointer.cpp
+++ b/clang/test/Analysis/inner-pointer.cpp
@@ -20,6 +20,9 @@ void default_arg(int a = 42, string &b = my_string);
 template <class T>
 T *addressof(T &arg);
 
+template <class T>
+T *__addressof(T &arg);
+
 char *data(std::string &c);
 
 } // end namespace std
@@ -383,6 +386,14 @@ void func_addressof() {
   consume(c); // no-warning
 }
 
+void func_AddressofFn_() {
+  const char *c;
+  std::string s;
+  c = s.c_str();
+  (void)std::__addressof(s);
+  consume(c); // no-warning
+}
+
 void func_std_data() {
   const char *c;
   std::string s;
diff --git a/clang/test/Analysis/z3-refute-enum-crash.cpp b/clang/test/Analysis/z3-refute-enum-crash.cpp
new file mode 100644
index 0000000..355c10f
--- /dev/null
+++ b/clang/test/Analysis/z3-refute-enum-crash.cpp
@@ -0,0 +1,77 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection \
+// RUN:   -analyzer-config crosscheck-with-z3=true -verify %s
+//
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection \
+// RUN:   -verify %s
+//
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection \
+// RUN:   -analyzer-config support-symbolic-integer-casts=true -verify=symbolic %s
+//
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection \
+// RUN:   -analyzer-config support-symbolic-integer-casts=false -verify %s
+//
+// REQUIRES: asserts, z3
+//
+// Requires z3 only for refutation. Works with both constraint managers.
+
+void clang_analyzer_dump(int);
+
+using sugar_t = unsigned char;
+
+// Enum types
+enum class ScopedSugared : sugar_t {};
+enum class ScopedPrimitive : unsigned char {};
+enum UnscopedSugared : sugar_t {};
+enum UnscopedPrimitive : unsigned char {};
+
+template <typename T>
+T conjure();
+
+void test_enum_types() {
+  // Let's construct a 'binop(sym, int)', where the binop will trigger an
+  // integral promotion to int. Note that we need to first explicit cast
+  // the scoped-enum to an integral, to make it compile. We could have choosen
+  // any other binary operator.
+  int sym1 = static_cast<unsigned char>(conjure<ScopedSugared>()) & 0x0F;
+  int sym2 = static_cast<unsigned char>(conjure<ScopedPrimitive>()) & 0x0F;
+  int sym3 = static_cast<unsigned char>(conjure<UnscopedSugared>()) & 0x0F;
+  int sym4 = static_cast<unsigned char>(conjure<UnscopedPrimitive>()) & 0x0F;
+
+  // We need to introduce a constraint referring to the binop, to get it
+  // serialized during the z3-refutation.
+  if (sym1 && sym2 && sym3 && sym4) {
+    // no-crash on these dumps
+    //
+    // The 'clang_analyzer_dump' will construct a bugreport, which in turn will
+    // trigger a z3-refutation. Refutation will walk the bugpath, collect and
+    // serialize the path-constraints into z3 expressions. The binop will
+    // operate over 'int' type, but the symbolic operand might have a different
+    // type - surprisingly.
+    // Historically, the static analyzer did not emit symbolic casts in a lot
+    // of cases, not even when the c++ standard mandated it, like for casting
+    // a scoped enum to its underlying type. Hence, during the z3 constraint
+    // serialization, it made up these 'missing' integral casts - for the
+    // implicit cases at least.
+    // However, it would still not emit the cast for missing explicit casts,
+    // hence 8-bit wide symbol would be bitwise 'and'-ed with a 32-bit wide
+    // int, violating an assertion stating that the operands should have the
+    // same bitwidths.
+
+    clang_analyzer_dump(sym1);
+    // expected-warning-re@-1 {{((unsigned char) (conj_${{[0-9]+}}{enum ScopedSugared, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}})) & 15}}
+    // symbolic-warning-re@-2           {{((int) (conj_${{[0-9]+}}{enum ScopedSugared, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}})) & 15}}
+
+    clang_analyzer_dump(sym2);
+    // expected-warning-re@-1 {{((unsigned char) (conj_${{[0-9]+}}{enum ScopedPrimitive, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}})) & 15}}
+    // symbolic-warning-re@-2           {{((int) (conj_${{[0-9]+}}{enum ScopedPrimitive, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}})) & 15}}
+
+    clang_analyzer_dump(sym3);
+    // expected-warning-re@-1        {{(conj_${{[0-9]+}}{enum UnscopedSugared, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}}) & 15}}
+    // symbolic-warning-re@-2 {{((int) (conj_${{[0-9]+}}{enum UnscopedSugared, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}})) & 15}}
+
+    clang_analyzer_dump(sym4);
+    // expected-warning-re@-1        {{(conj_${{[0-9]+}}{enum UnscopedPrimitive, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}}) & 15}}
+    // symbolic-warning-re@-2 {{((int) (conj_${{[0-9]+}}{enum UnscopedPrimitive, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}})) & 15}}
+  }
+}
+
diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c
index f5fbd0c..2988a318 100644
--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -58,6 +58,28 @@ void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) {
   unsigned long long r5 = __builtin_reduce_min(cvi1);
 }
 
+void test_builtin_reduce_add(si8 vi1, u4 vu1) {
+  // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
+  // CHECK-NEXT: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VI1]])
+  short r2 = __builtin_reduce_add(vi1);
+
+  // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
+  // CHECK-NEXT: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VU1]])
+  unsigned r3 = __builtin_reduce_add(vu1);
+
+  // CHECK:      [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16
+  // CHECK-NEXT: [[RDX1:%.+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[CVI1]])
+  // CHECK-NEXT: sext i16 [[RDX1]] to i32
+  const si8 cvi1 = vi1;
+  int r4 = __builtin_reduce_add(cvi1);
+
+  // CHECK:      [[CVU1:%.+]] = load <4 x i32>, <4 x i32>* %cvu1, align 16
+  // CHECK-NEXT: [[RDX2:%.+]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[CVU1]])
+  // CHECK-NEXT: zext i32 [[RDX2]] to i64
+  const u4 cvu1 = vu1;
+  unsigned long long r5 = __builtin_reduce_add(cvu1);
+}
+
 void test_builtin_reduce_xor(si8 vi1, u4 vu1) {
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
diff --git a/clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp b/clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp
new file mode 100644
index 0000000..e5f1311
--- /dev/null
+++ b/clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp
@@ -0,0 +1,224 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
+// RUN: -fallow-half-arguments-and-returns -disable-O0-optnone \
+// RUN:  -emit-llvm -o - %s | opt -S -sroa | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+// CHECK-LABEL: @_Z9cond_boolu10__SVBool_tu10__SVBool_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 16 x i1> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 16 x i1> [[CMP]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 16 x i1> [[VECTOR_COND]], <vscale x 16 x i1> [[A]], <vscale x 16 x i1> [[B]]
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[VECTOR_SELECT]]
+//
+svbool_t cond_bool(svbool_t a, svbool_t b) {
+    return a < b ? a : b;
+}
+
+// CHECK-LABEL: @_Z7cond_i8u10__SVInt8_tu10__SVInt8_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 16 x i1> [[CMP]] to <vscale x 16 x i8>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 16 x i8> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 16 x i1> [[VECTOR_COND]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[VECTOR_SELECT]]
+//
+svint8_t cond_i8(svint8_t a, svint8_t b) {
+    return a < b ? a : b;
+}
+
+// CHECK-LABEL: @_Z7cond_u8u11__SVUint8_tu11__SVUint8_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 16 x i1> [[CMP]] to <vscale x 16 x i8>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 16 x i8> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 16 x i1> [[VECTOR_COND]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[VECTOR_SELECT]]
+//
+svuint8_t cond_u8(svuint8_t a, svuint8_t b) {
+    return a < b ? a : b;
+}
+
+// CHECK-LABEL: @_Z8cond_i16u11__SVInt16_tu11__SVInt16_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 8 x i1> [[CMP]] to <vscale x 8 x i16>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 8 x i16> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 8 x i1> [[VECTOR_COND]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[VECTOR_SELECT]]
+//
+svint16_t cond_i16(svint16_t a, svint16_t b) {
+    return a < b ? a : b;
+}
+
+// CHECK-LABEL: @_Z8cond_u16u12__SVUint16_tu12__SVUint16_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 8 x i1> [[CMP]] to <vscale x 8 x i16>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 8 x i16> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 8 x i1> [[VECTOR_COND]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[VECTOR_SELECT]]
+//
+svuint16_t cond_u16(svuint16_t a, svuint16_t b) {
+    return a < b ? a : b;
+}
+
+// CHECK-LABEL: @_Z8cond_i32u11__SVInt32_tu11__SVInt32_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 4 x i1> [[CMP]] to <vscale x 4 x i32>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 4 x i32> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 4 x i1> [[VECTOR_COND]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[VECTOR_SELECT]]
+//
+svint32_t cond_i32(svint32_t a, svint32_t b) {
+    return a < b ? a : b;
+}
+
+// CHECK-LABEL: @_Z8cond_u32u12__SVUint32_tu12__SVUint32_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 4 x i1> [[CMP]] to <vscale x 4 x i32>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 4 x i32> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 4 x i1> [[VECTOR_COND]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[VECTOR_SELECT]]
+//
+svuint32_t cond_u32(svuint32_t a, svuint32_t b) {
+    return a < b ? a : b;
+}
+
+// CHECK-LABEL: @_Z8cond_i64u11__SVInt64_tu11__SVInt64_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 2 x i64> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 2 x i1> [[CMP]] to <vscale x 2 x i64>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 2 x i64> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 2 x i1> [[VECTOR_COND]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[VECTOR_SELECT]]
+//
+svint64_t cond_i64(svint64_t a, svint64_t b) {
+    return a < b ? a : b;
+}
+
+// CHECK-LABEL: @_Z8cond_u64u12__SVUint64_tu12__SVUint64_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 2 x i64> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 2 x i1> [[CMP]] to <vscale x 2 x i64>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 2 x i64> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 2 x i1> [[VECTOR_COND]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[VECTOR_SELECT]]
+//
+svuint64_t cond_u64(svuint64_t a, svuint64_t b) {
+    return a < b ? a : b;
+}
+
+// CHECK-LABEL: @_Z8cond_f16u13__SVFloat16_tu13__SVFloat16_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <vscale x 8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 8 x i1> [[CMP]] to <vscale x 8 x i16>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 8 x i16> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 8 x i1> [[VECTOR_COND]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]
+// CHECK-NEXT:    ret <vscale x 8 x half> [[VECTOR_SELECT]]
+//
+svfloat16_t cond_f16(svfloat16_t a, svfloat16_t b) {
+    return a < b ? a : b;
+}
+
+// CHECK-LABEL: @_Z8cond_f32u13__SVFloat32_tu13__SVFloat32_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <vscale x 4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 4 x i1> [[CMP]] to <vscale x 4 x i32>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 4 x i32> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 4 x i1> [[VECTOR_COND]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]
+// CHECK-NEXT:    ret <vscale x 4 x float> [[VECTOR_SELECT]]
+//
+svfloat32_t cond_f32(svfloat32_t a, svfloat32_t b) {
+    return a < b ? a : b;
+}
+
+// CHECK-LABEL: @_Z8cond_f64u13__SVFloat64_tu13__SVFloat64_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <vscale x 2 x double> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 2 x i1> [[CMP]] to <vscale x 2 x i64>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 2 x i64> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 2 x i1> [[VECTOR_COND]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]
+// CHECK-NEXT:    ret <vscale x 2 x double> [[VECTOR_SELECT]]
+//
+svfloat64_t cond_f64(svfloat64_t a, svfloat64_t b) {
+    return a < b ? a : b;
+}
+
+// CHECK-LABEL: @_Z14cond_i32_splatu11__SVInt32_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 4 x i32> [[A:%.*]], zeroinitializer
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 4 x i1> [[CMP]] to <vscale x 4 x i32>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 4 x i32> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 4 x i1> [[VECTOR_COND]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[VECTOR_SELECT]]
+//
+svint32_t cond_i32_splat(svint32_t a) {
+    return a < 0 ? a : 0;
+}
+
+// CHECK-LABEL: @_Z14cond_u32_splatu12__SVUint32_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 4 x i32> [[A:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 4 x i1> [[CMP]] to <vscale x 4 x i32>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 4 x i32> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 4 x i1> [[VECTOR_COND]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[VECTOR_SELECT]]
+//
+svuint32_t cond_u32_splat(svuint32_t a) {
+    return a < 1u ? a : 1u;
+}
+
+// CHECK-LABEL: @_Z14cond_i64_splatu11__SVInt64_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 2 x i64> [[A:%.*]], zeroinitializer
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 2 x i1> [[CMP]] to <vscale x 2 x i64>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 2 x i64> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 2 x i1> [[VECTOR_COND]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> zeroinitializer
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[VECTOR_SELECT]]
+//
+svint64_t cond_i64_splat(svint64_t a) {
+    return a < 0l ? a : 0l;
+}
+
+// CHECK-LABEL: @_Z14cond_u64_splatu12__SVUint64_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 2 x i64> [[A:%.*]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 2 x i1> [[CMP]] to <vscale x 2 x i64>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 2 x i64> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 2 x i1> [[VECTOR_COND]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[VECTOR_SELECT]]
+//
+svuint64_t cond_u64_splat(svuint64_t a) {
+    return a < 1ul ? a : 1ul;
+}
+
+// CHECK-LABEL: @_Z14cond_f32_splatu13__SVFloat32_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <vscale x 4 x float> [[A:%.*]], zeroinitializer
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 4 x i1> [[CMP]] to <vscale x 4 x i32>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 4 x i32> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 4 x i1> [[VECTOR_COND]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT:    ret <vscale x 4 x float> [[VECTOR_SELECT]]
+//
+svfloat32_t cond_f32_splat(svfloat32_t a) {
+    return a < 0.f ? a : 0.f;
+}
+
+// CHECK-LABEL: @_Z14cond_f64_splatu13__SVFloat64_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <vscale x 2 x double> [[A:%.*]], zeroinitializer
+// CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 2 x i1> [[CMP]] to <vscale x 2 x i64>
+// CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 2 x i64> [[CONV]], zeroinitializer
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 2 x i1> [[VECTOR_COND]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT:    ret <vscale x 2 x double> [[VECTOR_SELECT]]
+//
+svfloat64_t cond_f64_splat(svfloat64_t a) {
+    return a < 0. ? a : 0.;
+}
+
diff --git a/clang/test/CodeGenCXX/clang-abi-compat.cpp b/clang/test/CodeGenCXX/clang-abi-compat.cpp
index d18b1bf..9a8df91 100644
--- a/clang/test/CodeGenCXX/clang-abi-compat.cpp
+++ b/clang/test/CodeGenCXX/clang-abi-compat.cpp
@@ -1,25 +1,27 @@
 // RUN: %clang_cc1 -no-opaque-pointers -std=c++98 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.0 %s -emit-llvm -o - -Wno-c++11-extensions \
-// RUN:     | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s
+// RUN:     | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12,PRE15 %s
 // RUN: %clang_cc1 -no-opaque-pointers -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.0 %s -emit-llvm -o - \
-// RUN:     | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s
+// RUN:     | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12,PRE15 %s
 // RUN: %clang_cc1 -no-opaque-pointers -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.8 %s -emit-llvm -o - \
-// RUN:     | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s
+// RUN:     | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12,PRE15 %s
 // RUN: %clang_cc1 -no-opaque-pointers -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.9 %s -emit-llvm -o - \
-// RUN:     | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12,PRE15 %s
 // RUN: %clang_cc1 -no-opaque-pointers -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=4.0 %s -emit-llvm -o - \
-// RUN:     | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12,PRE15 %s
 // RUN: %clang_cc1 -no-opaque-pointers -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=5 %s -emit-llvm -o - \
-// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17,PRE15 %s
 // RUN: %clang_cc1 -no-opaque-pointers -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=11 %s -emit-llvm -o - \
-// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17,PRE15 %s
 // RUN: %clang_cc1 -no-opaque-pointers -std=c++20 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=11 %s -emit-llvm -o - \
-// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17,PRE12-CXX20,PRE13-CXX20 %s
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17,PRE12-CXX20,PRE13-CXX20,PRE15 %s
 // RUN: %clang_cc1 -no-opaque-pointers -std=c++20 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=12 %s -emit-llvm -o - \
-// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,V12,V12-CXX17,V12-CXX20,PRE13-CXX20 %s
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,V12,V12-CXX17,V12-CXX20,PRE13-CXX20,PRE15 %s
+// RUN: %clang_cc1 -no-opaque-pointers -std=c++20 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=14 %s -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,V12,V12-CXX17,V12-CXX20,V13-CXX20,PRE15 %s
 // RUN: %clang_cc1 -no-opaque-pointers -std=c++98 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=latest %s -emit-llvm -o - -Wno-c++11-extensions \
-// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,V12 %s
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,V12,V15 %s
 // RUN: %clang_cc1 -no-opaque-pointers -std=c++20 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=latest %s -emit-llvm -o - \
-// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,V12,V12-CXX17,V12-CXX20,V13-CXX20 %s
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,V12,V12-CXX17,V12-CXX20,V13-CXX20,V15 %s
 
 typedef __attribute__((vector_size(8))) long long v1xi64;
 void clang39(v1xi64) {}
@@ -147,3 +149,14 @@ int observe_lambdas(T, U, V, W) { return 0; }
 inline auto inline_var_lambda = observe_lambdas([]{}, []{}, (int*)0, (int*)0);
 int use_inline_var_lambda() { return inline_var_lambda; }
 #endif
+
+struct X {
+  struct Y {
+    using a = int;
+    using b = int;
+  };
+};
+template <typename T> void test10(typename T::Y::a, typename T::Y::b, float*, float*) {}
+// PRE15: @_Z6test10I1XEvNT_1Y1aENS1_1Y1bEPfS4_
+// V15:   @_Z6test10I1XEvNT_1Y1aENS2_1bEPfS5_
+template void test10<X>(int, int, float*, float*);
diff --git a/clang/test/CodeGenCXX/mangle.cpp b/clang/test/CodeGenCXX/mangle.cpp
index f6b5297..eb87118 100644
--- a/clang/test/CodeGenCXX/mangle.cpp
+++ b/clang/test/CodeGenCXX/mangle.cpp
@@ -1155,3 +1155,15 @@ namespace test60 {
   // CHECK-LABEL: @_ZN6test601fIiEEvDTplL_ZNS_1aEEcvT__EE
   template void f<int>(int);
 }
+
+namespace test61 {
+  struct X {
+    struct Y {
+      using a = int;
+      using b = int;
+    };
+  };
+  template <typename T> void f(typename T::Y::a, typename T::Y::b) {}
+  // CHECK-LABEL: @_ZN6test611fINS_1XEEEvNT_1Y1aENS3_1bE
+  template void f<X>(int, int);
+}
diff --git a/clang/test/Driver/amdgcn-toolchain-pic.cl b/clang/test/Driver/amdgcn-toolchain-pic.cl
index fc2978a..47adb881 100644
--- a/clang/test/Driver/amdgcn-toolchain-pic.cl
+++ b/clang/test/Driver/amdgcn-toolchain-pic.cl
@@ -1,7 +1,7 @@
-// RUN: %clang -no-canonical-prefixes -### -target amdgcn-- -mcpu=gfx803 %s 2>&1 | FileCheck %s
-// RUN: %clang -no-canonical-prefixes -### -target amdgcn-amd- -mcpu=gfx803 %s 2>&1 | FileCheck %s
-// RUN: %clang -no-canonical-prefixes -### -target amdgcn-amd-amdhsa -mcpu=gfx803 %s 2>&1 | FileCheck %s
-// RUN: %clang -no-canonical-prefixes -### -target amdgcn-amd-amdpal -mcpu=gfx803 %s 2>&1 | FileCheck %s
-// RUN: %clang -no-canonical-prefixes -### -target amdgcn-amd-mesa3d -mcpu=gfx803 %s 2>&1 | FileCheck %s
+// RUN: %clang -### --target=amdgcn-- -mcpu=gfx803 %s 2>&1 | FileCheck %s
+// RUN: %clang -### --target=amdgcn-amd- -mcpu=gfx803 %s 2>&1 | FileCheck %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx803 %s 2>&1 | FileCheck %s
+// RUN: %clang -### --target=amdgcn-amd-amdpal -mcpu=gfx803 %s 2>&1 | FileCheck %s
+// RUN: %clang -### --target=amdgcn-amd-mesa3d -mcpu=gfx803 %s 2>&1 | FileCheck %s
 
-// CHECK: clang{{.*}} "-mrelocation-model" "pic" "-pic-level" "1"
+// CHECK: "-cc1"{{.*}} "-mrelocation-model" "pic" "-pic-level" "1"
diff --git a/clang/test/Driver/amdgpu-openmp-system-arch.c b/clang/test/Driver/amdgpu-openmp-system-arch.c
index ef85925b..a0e8754 100644
--- a/clang/test/Driver/amdgpu-openmp-system-arch.c
+++ b/clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -12,13 +12,13 @@
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
 // RUN:   | FileCheck %s
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]"
+// CHECK: "-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]"
 // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o"
 
 // case when amdgpu_arch returns multiple gpus but of same arch
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx908_gfx908 %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-MULTIPLE
-// CHECK-MULTIPLE: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx908]]"
+// CHECK-MULTIPLE: "-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx908]]"
 // CHECK-MULTIPLE: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
 // CHECK-MULTIPLE: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o"
diff --git a/clang/test/Driver/amdgpu-toolchain-opencl.cl b/clang/test/Driver/amdgpu-toolchain-opencl.cl
index 152eda1..78c477b 100644
--- a/clang/test/Driver/amdgpu-toolchain-opencl.cl
+++ b/clang/test/Driver/amdgpu-toolchain-opencl.cl
@@ -1,27 +1,27 @@
-// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -O0 %s 2>&1 | FileCheck -check-prefix=CHECK_O0 %s
-// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -O1 %s 2>&1 | FileCheck -check-prefix=CHECK_O1 %s
-// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -O2 %s 2>&1 | FileCheck -check-prefix=CHECK_O2 %s
-// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -O3 %s 2>&1 | FileCheck -check-prefix=CHECK_O3 %s
-// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -O4 %s 2>&1 | FileCheck -check-prefix=CHECK_O4 %s
-// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -O5 %s 2>&1 | FileCheck -check-prefix=CHECK_O5 %s
-// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -Og %s 2>&1 | FileCheck -check-prefix=CHECK_Og %s
-// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -Ofast %s 2>&1 | FileCheck -check-prefix=CHECK_Ofast %s
-// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=CHECK_O_DEFAULT %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -O0 %s 2>&1 | FileCheck -check-prefix=CHECK_O0 %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -O1 %s 2>&1 | FileCheck -check-prefix=CHECK_O1 %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -O2 %s 2>&1 | FileCheck -check-prefix=CHECK_O2 %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -O3 %s 2>&1 | FileCheck -check-prefix=CHECK_O3 %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -O4 %s 2>&1 | FileCheck -check-prefix=CHECK_O4 %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -O5 %s 2>&1 | FileCheck -check-prefix=CHECK_O5 %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -Og %s 2>&1 | FileCheck -check-prefix=CHECK_Og %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -Ofast %s 2>&1 | FileCheck -check-prefix=CHECK_Ofast %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=CHECK_O_DEFAULT %s
 
 // Check default include file is not included for preprocessor output.
 
-// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=CHK-INC %s
-// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -save-temps %s 2>&1 | FileCheck -check-prefix=CHK-INC %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=CHK-INC %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -save-temps %s 2>&1 | FileCheck -check-prefix=CHK-INC %s
 
-// CHECK_O0: clang{{.*}} "-O0"
-// CHECK_O1: clang{{.*}} "-O1"
-// CHECK_O2: clang{{.*}} "-O2"
-// CHECK_O3: clang{{.*}} "-O3"
-// CHECK_O4: clang{{.*}} "-O3"
-// CHECK_O5: clang{{.*}} "-O5"
-// CHECK_Og: clang{{.*}} "-Og"
-// CHECK_Ofast: {{.*}}clang{{.*}} "-Ofast"
-// CHECK_O_DEFAULT: clang{{.*}} "-O3"
+// CHECK_O0: "-cc1"{{.*}} "-O0"
+// CHECK_O1: "-cc1"{{.*}} "-O1"
+// CHECK_O2: "-cc1"{{.*}} "-O2"
+// CHECK_O3: "-cc1"{{.*}} "-O3"
+// CHECK_O4: "-cc1"{{.*}} "-O3"
+// CHECK_O5: "-cc1"{{.*}} "-O5"
+// CHECK_Og: "-cc1"{{.*}} "-Og"
+// CHECK_Ofast: "-cc1"{{.*}} "-Ofast"
+// CHECK_O_DEFAULT: "-cc1"{{.*}} "-O3"
 
-// CHK-INC: clang{{.*}} "-cc1" {{.*}}"-finclude-default-header" "-fdeclare-opencl-builtins" {{.*}}"-x" "cl"
-// CHK-INC-NOT: clang{{.*}} "-cc1" {{.*}}"-finclude-default-header" "-fdeclare-opencl-builtins" {{.*}}"-x" "cpp-output"
+// CHK-INC: "-cc1" {{.*}}"-finclude-default-header" "-fdeclare-opencl-builtins" {{.*}}"-x" "cl"
+// CHK-INC-NOT: "-cc1" {{.*}}"-finclude-default-header" "-fdeclare-opencl-builtins" {{.*}}"-x" "cpp-output"
diff --git a/clang/test/Driver/amdgpu-toolchain.c b/clang/test/Driver/amdgpu-toolchain.c
index bea7ef7..c0a4354 100644
--- a/clang/test/Driver/amdgpu-toolchain.c
+++ b/clang/test/Driver/amdgpu-toolchain.c
@@ -1,16 +1,16 @@
-// RUN: %clang -### -target amdgcn--amdhsa -x assembler -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=AS_LINK %s
-// RUN: %clang -### -g -target amdgcn--amdhsa -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
-// RUN: %clang -### -target amdgcn-amd-amdpal -x assembler -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=AS_LINK %s
-// RUN: %clang -### -g -target amdgcn-amd-amdpal -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
-// RUN: %clang -### -target amdgcn-mesa-mesa3d -x assembler -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=AS_LINK %s
-// RUN: %clang -### -g -target amdgcn-mesa-mesa3d -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
+// RUN: %clang -### --target=amdgcn--amdhsa -x assembler -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=AS_LINK %s
+// RUN: %clang -### -g --target=amdgcn--amdhsa -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
+// RUN: %clang -### --target=amdgcn-amd-amdpal -x assembler -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=AS_LINK %s
+// RUN: %clang -### -g --target=amdgcn-amd-amdpal -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
+// RUN: %clang -### --target=amdgcn-mesa-mesa3d -x assembler -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=AS_LINK %s
+// RUN: %clang -### -g --target=amdgcn-mesa-mesa3d -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
 
-// AS_LINK: clang{{.*}} "-cc1as"
+// AS_LINK: "-cc1as"
 // AS_LINK: ld.lld{{.*}} "-shared"
 
 // DWARF_VER: "-dwarf-version=5"
 
-// RUN: %clang -### -target amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
+// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
 // RUN:   -flto %s 2>&1 | FileCheck -check-prefix=LTO %s
 // LTO: clang{{.*}} "-flto=full"
 // LTO: ld.lld{{.*}}
diff --git a/clang/test/Driver/ananas.c b/clang/test/Driver/ananas.c
index 4edc2a1..a6cabed 100644
--- a/clang/test/Driver/ananas.c
+++ b/clang/test/Driver/ananas.c
@@ -1,4 +1,4 @@
-// RUN: %clang -no-canonical-prefixes -target x86_64-unknown-ananas -static %s \
+// RUN: %clang --target=x86_64-unknown-ananas -static %s \
 // RUN:   --sysroot=%S/Inputs/ananas-tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-STATIC %s
 // CHECK-STATIC: ld{{.*}}" "-Bstatic"
@@ -8,10 +8,17 @@
 // CHECK-STATIC: crtend.o
 // CHECK-STATIC: crtn.o
 
-// RUN: %clang -no-canonical-prefixes -target x86_64-unknown-ananas -shared %s \
+// RUN: %clang --target=x86_64-unknown-ananas -shared %s \
 // RUN:   --sysroot=%S/Inputs/ananas-tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-SHARED %s
 // CHECK-SHARED: crti.o
 // CHECK-SHARED: crtbeginS.o
 // CHECK-SHARED: crtendS.o
 // CHECK-SHARED: crtn.o
+
+// -r suppresses default -l and crt*.o like -nostdlib.
+// RUN: %clang %s -### -o %t.o --target=x86_64-unknown-ananas -r 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-RELOCATABLE
+// CHECK-RELOCATABLE:     "-r"
+// CHECK-RELOCATABLE-NOT: "-l
+// CHECK-RELOCATABLE-NOT: {{.*}}crt{{[^.]+}}.o
diff --git a/clang/test/Driver/avr-link-nostdlib-nodefaultlibs.c b/clang/test/Driver/avr-link-nostdlib-nodefaultlibs.c
index 030ea84..660accd 100644
--- a/clang/test/Driver/avr-link-nostdlib-nodefaultlibs.c
+++ b/clang/test/Driver/avr-link-nostdlib-nodefaultlibs.c
@@ -1,5 +1,5 @@
-// RUN: %clang -### -target avr -no-canonical-prefixes -save-temps -mmcu=atmega328 -nostdlib %s 2>&1 | FileCheck %s
-// RUN: %clang -### -target avr -no-canonical-prefixes -save-temps -mmcu=atmega328 -nodefaultlibs %s 2>&1 | FileCheck %s
+// RUN: %clang -### --target=avr -save-temps -mmcu=atmega328 -nostdlib %s 2>&1 | FileCheck %s
+// RUN: %clang -### --target=avr -save-temps -mmcu=atmega328 -nodefaultlibs %s 2>&1 | FileCheck %s
 
 // nostdlib and nodefaultlibs programs should compile fine.
 
diff --git a/clang/test/Driver/avr-mmcu.c b/clang/test/Driver/avr-mmcu.c
index 4f0a340..6c13ab5 100644
--- a/clang/test/Driver/avr-mmcu.c
+++ b/clang/test/Driver/avr-mmcu.c
@@ -1,81 +1,81 @@
 // A test for the propagation of the -mmcu option to -cc1 and -cc1as
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=attiny11 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK0 %s
-// CHECK0: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "attiny11"
-// CHECK0: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "attiny11"
+// RUN: %clang -### --target=avr -mmcu=attiny11 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK0 %s
+// CHECK0: "-cc1" {{.*}} "-target-cpu" "attiny11"
+// CHECK0: "-cc1as" {{.*}} "-target-cpu" "attiny11"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=at90s2313 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK1 %s
-// CHECK1: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "at90s2313"
-// CHECK1: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "at90s2313"
+// RUN: %clang -### --target=avr -mmcu=at90s2313 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK1 %s
+// CHECK1: "-cc1" {{.*}} "-target-cpu" "at90s2313"
+// CHECK1: "-cc1as" {{.*}} "-target-cpu" "at90s2313"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=at90s8515 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK2 %s
-// CHECK2: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "at90s8515"
-// CHECK2: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "at90s8515"
+// RUN: %clang -### --target=avr -mmcu=at90s8515 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK2 %s
+// CHECK2: "-cc1" {{.*}} "-target-cpu" "at90s8515"
+// CHECK2: "-cc1as" {{.*}} "-target-cpu" "at90s8515"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=attiny13a -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK3 %s
-// CHECK3: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "attiny13a"
-// CHECK3: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "attiny13a"
+// RUN: %clang -### --target=avr -mmcu=attiny13a -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK3 %s
+// CHECK3: "-cc1" {{.*}} "-target-cpu" "attiny13a"
+// CHECK3: "-cc1as" {{.*}} "-target-cpu" "attiny13a"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=attiny88 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK4 %s
-// CHECK4: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "attiny88"
-// CHECK4: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "attiny88"
+// RUN: %clang -### --target=avr -mmcu=attiny88 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK4 %s
+// CHECK4: "-cc1" {{.*}} "-target-cpu" "attiny88"
+// CHECK4: "-cc1as" {{.*}} "-target-cpu" "attiny88"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=attiny88 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK5 %s
-// CHECK5: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "attiny88"
-// CHECK5: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "attiny88"
+// RUN: %clang -### --target=avr -mmcu=attiny88 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK5 %s
+// CHECK5: "-cc1" {{.*}} "-target-cpu" "attiny88"
+// CHECK5: "-cc1as" {{.*}} "-target-cpu" "attiny88"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atmega8u2 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK6 %s
-// CHECK6: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atmega8u2"
-// CHECK6: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atmega8u2"
+// RUN: %clang -### --target=avr -mmcu=atmega8u2 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK6 %s
+// CHECK6: "-cc1" {{.*}} "-target-cpu" "atmega8u2"
+// CHECK6: "-cc1as" {{.*}} "-target-cpu" "atmega8u2"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atmega8u2 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK7 %s
-// CHECK7: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atmega8u2"
-// CHECK7: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atmega8u2"
+// RUN: %clang -### --target=avr -mmcu=atmega8u2 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK7 %s
+// CHECK7: "-cc1" {{.*}} "-target-cpu" "atmega8u2"
+// CHECK7: "-cc1as" {{.*}} "-target-cpu" "atmega8u2"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atmega8a -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK8 %s
-// CHECK8: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atmega8a"
-// CHECK8: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atmega8a"
+// RUN: %clang -### --target=avr -mmcu=atmega8a -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK8 %s
+// CHECK8: "-cc1" {{.*}} "-target-cpu" "atmega8a"
+// CHECK8: "-cc1as" {{.*}} "-target-cpu" "atmega8a"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atmega8a -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK9 %s
-// CHECK9: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atmega8a"
-// CHECK9: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atmega8a"
+// RUN: %clang -### --target=avr -mmcu=atmega8a -save-temps %s 2>&1 | FileCheck -check-prefix=CHECK9 %s
+// CHECK9: "-cc1" {{.*}} "-target-cpu" "atmega8a"
+// CHECK9: "-cc1as" {{.*}} "-target-cpu" "atmega8a"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atmega16a -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKa %s
-// CHECKa: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atmega16a"
-// CHECKa: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atmega16a"
+// RUN: %clang -### --target=avr -mmcu=atmega16a -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKa %s
+// CHECKa: "-cc1" {{.*}} "-target-cpu" "atmega16a"
+// CHECKa: "-cc1as" {{.*}} "-target-cpu" "atmega16a"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atmega16a -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKb %s
-// CHECKb: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atmega16a"
-// CHECKb: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atmega16a"
+// RUN: %clang -### --target=avr -mmcu=atmega16a -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKb %s
+// CHECKb: "-cc1" {{.*}} "-target-cpu" "atmega16a"
+// CHECKb: "-cc1as" {{.*}} "-target-cpu" "atmega16a"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atmega128a -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKc %s
-// CHECKc: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atmega128a"
-// CHECKc: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atmega128a"
+// RUN: %clang -### --target=avr -mmcu=atmega128a -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKc %s
+// CHECKc: "-cc1" {{.*}} "-target-cpu" "atmega128a"
+// CHECKc: "-cc1as" {{.*}} "-target-cpu" "atmega128a"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atmega2560 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKd %s
-// CHECKd: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atmega2560"
-// CHECKd: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atmega2560"
+// RUN: %clang -### --target=avr -mmcu=atmega2560 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKd %s
+// CHECKd: "-cc1" {{.*}} "-target-cpu" "atmega2560"
+// CHECKd: "-cc1as" {{.*}} "-target-cpu" "atmega2560"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=attiny10 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKe %s
-// CHECKe: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "attiny10"
-// CHECKe: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "attiny10"
+// RUN: %clang -### --target=avr -mmcu=attiny10 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKe %s
+// CHECKe: "-cc1" {{.*}} "-target-cpu" "attiny10"
+// CHECKe: "-cc1as" {{.*}} "-target-cpu" "attiny10"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atxmega16a4 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKf %s
-// CHECKf: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atxmega16a4"
-// CHECKf: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atxmega16a4"
+// RUN: %clang -### --target=avr -mmcu=atxmega16a4 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKf %s
+// CHECKf: "-cc1" {{.*}} "-target-cpu" "atxmega16a4"
+// CHECKf: "-cc1as" {{.*}} "-target-cpu" "atxmega16a4"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atxmega64b1 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKg %s
-// CHECKg: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atxmega64b1"
-// CHECKg: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atxmega64b1"
+// RUN: %clang -### --target=avr -mmcu=atxmega64b1 -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKg %s
+// CHECKg: "-cc1" {{.*}} "-target-cpu" "atxmega64b1"
+// CHECKg: "-cc1as" {{.*}} "-target-cpu" "atxmega64b1"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atxmega64a1u -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKh %s
-// CHECKh: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atxmega64a1u"
-// CHECKh: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atxmega64a1u"
+// RUN: %clang -### --target=avr -mmcu=atxmega64a1u -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKh %s
+// CHECKh: "-cc1" {{.*}} "-target-cpu" "atxmega64a1u"
+// CHECKh: "-cc1as" {{.*}} "-target-cpu" "atxmega64a1u"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atxmega128a3u -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKj %s
-// CHECKj: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atxmega128a3u"
-// CHECKj: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atxmega128a3u"
+// RUN: %clang -### --target=avr -mmcu=atxmega128a3u -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKj %s
+// CHECKj: "-cc1" {{.*}} "-target-cpu" "atxmega128a3u"
+// CHECKj: "-cc1as" {{.*}} "-target-cpu" "atxmega128a3u"
 
-// RUN: %clang -### -target avr -no-canonical-prefixes -mmcu=atxmega128a4u -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKi %s
-// CHECKi: clang{{.*}} "-cc1" {{.*}} "-target-cpu" "atxmega128a4u"
-// CHECKi: clang{{.*}} "-cc1as" {{.*}} "-target-cpu" "atxmega128a4u"
+// RUN: %clang -### --target=avr -mmcu=atxmega128a4u -save-temps %s 2>&1 | FileCheck -check-prefix=CHECKi %s
+// CHECKi: "-cc1" {{.*}} "-target-cpu" "atxmega128a4u"
+// CHECKi: "-cc1as" {{.*}} "-target-cpu" "atxmega128a4u"
diff --git a/clang/test/Driver/avr-toolchain.c b/clang/test/Driver/avr-toolchain.c
index 89226a6..5bdd644 100644
--- a/clang/test/Driver/avr-toolchain.c
+++ b/clang/test/Driver/avr-toolchain.c
@@ -1,8 +1,8 @@
 // UNSUPPORTED: system-windows
 // A basic clang -cc1 command-line.
 
-// RUN: %clang %s -### --target=avr --sysroot=%S/Inputs/basic_avr_tree -resource-dir=%S/Inputs/resource_dir 2>&1 | FileCheck --check-prefix=CHECK1 %s
-// CHECK1: clang{{.*}} "-cc1" "-triple" "avr"
+// RUN: %clang -### %s --target=avr --sysroot=%S/Inputs/basic_avr_tree -resource-dir=%S/Inputs/resource_dir 2>&1 | FileCheck --check-prefix=CHECK1 %s
+// CHECK1: "-cc1" "-triple" "avr"
 // CHECK1-SAME: "-resource-dir" "[[RESOURCE:[^"]+]]"
 // CHECK1-SAME: "-isysroot" "[[SYSROOT:[^"]+/basic_avr_tree]]"
 // CHECK1-SAME: "-internal-isystem"
@@ -12,28 +12,28 @@
 // CHECK1-SAME: "-o" "a.out"
 // CHECK1-SAME: {{^}} "--gc-sections"
 
-// RUN: %clang %s -### --target=avr --sysroot=%S/Inputs/basic_avr_tree_2/opt/local -S 2>&1 | FileCheck --check-prefix=CHECK2 %s
-// CHECK2: clang{{.*}} "-cc1" "-triple" "avr"
+// RUN: %clang -### %s --target=avr --sysroot=%S/Inputs/basic_avr_tree_2/opt/local -S 2>&1 | FileCheck --check-prefix=CHECK2 %s
+// CHECK2: "-cc1" "-triple" "avr"
 // CHECK2-SAME: "-isysroot" "[[SYSROOT:[^"]+/basic_avr_tree_2/opt/local]]"
 // CHECK2-SAME: "-internal-isystem"
 // CHECK2-SAME: {{^}} "[[SYSROOT]]/lib/gcc/avr/10.3.0/../../../../avr/include"
 
-// RUN: %clang %s -### --target=avr --sysroot=%S/Inputs/basic_avr_tree_2 -S 2>&1 | FileCheck --check-prefix=CHECK3 %s
-// CHECK3: clang{{.*}} "-cc1" "-triple" "avr"
+// RUN: %clang -### %s --target=avr --sysroot=%S/Inputs/basic_avr_tree_2 -S 2>&1 | FileCheck --check-prefix=CHECK3 %s
+// CHECK3: "-cc1" "-triple" "avr"
 // CHECK3-SAME: "-isysroot" "[[SYSROOT:[^"]+/basic_avr_tree_2]]"
 // CHECK3-SAME: "-internal-isystem"
 // CHECK3-SAME: {{^}} "[[SYSROOT]]/usr/avr/include"
 
-// RUN: %clang %s -### --target=avr 2>&1 | FileCheck -check-prefix=CC1 %s
-// CC1: clang{{.*}} "-cc1" "-triple" "avr" {{.*}} "-fno-use-init-array" "-fno-use-cxa-atexit"
+// RUN: %clang -### %s --target=avr 2>&1 | FileCheck -check-prefix=CC1 %s
+// CC1: "-cc1" "-triple" "avr" {{.*}} "-fno-use-init-array" "-fno-use-cxa-atexit"
 
-// RUN: %clang %s -### --target=avr -fuse-init-array -fuse-cxa-atexit 2>&1 | FileCheck -check-prefix=CHECK4 %s
-// CHECK4: clang{{.*}} "-cc1" "-triple" "avr"
+// RUN: %clang -### %s --target=avr -fuse-init-array -fuse-cxa-atexit 2>&1 | FileCheck -check-prefix=CHECK4 %s
+// CHECK4: "-cc1" "-triple" "avr"
 // CHECK4-NOT: "-fno-use-init-array"
 // CHECK4-NOT: "-fno-use-cxa-atexit"
 
-// RUN: %clang %s -### --target=avr --sysroot=%S/Inputs/basic_avr_tree 2>&1 -nostdinc | FileCheck --check-prefix=NOSTDINC %s
-// RUN: %clang %s -### --target=avr --sysroot=%S/Inputs/basic_avr_tree 2>&1 -nostdlibinc | FileCheck --check-prefix=NOSTDINC %s
+// RUN: %clang -### %s --target=avr --sysroot=%S/Inputs/basic_avr_tree 2>&1 -nostdinc | FileCheck --check-prefix=NOSTDINC %s
+// RUN: %clang -### %s --target=avr --sysroot=%S/Inputs/basic_avr_tree 2>&1 -nostdlibinc | FileCheck --check-prefix=NOSTDINC %s
 // NOSTDINC-NOT: "-internal-isystem" {{".*avr/include"}}
 
 // RUN: %clang -### --target=avr --sysroot=%S/Inputs/basic_avr_tree -mmcu=atmega328 %s 2>&1 | FileCheck --check-prefix=NOWARN %s
diff --git a/clang/test/Driver/cc-log-diagnostics.c b/clang/test/Driver/cc-log-diagnostics.c
index c6c0afa..22d3389 100644
--- a/clang/test/Driver/cc-log-diagnostics.c
+++ b/clang/test/Driver/cc-log-diagnostics.c
@@ -1,7 +1,7 @@
 // RUN: rm -f %t.log
 // RUN: env RC_DEBUG_OPTIONS=1 \
 // RUN:     CC_LOG_DIAGNOSTICS=1 CC_LOG_DIAGNOSTICS_FILE=%t.log \
-// RUN: %clang -Wfoobar -no-canonical-prefixes -target x86_64-apple-darwin10 -fsyntax-only %s
+// RUN: %clang -Wfoobar --target=x86_64-apple-darwin10 -fsyntax-only %s
 // RUN: FileCheck %s < %t.log
 
 int f0(void) {}
@@ -10,7 +10,7 @@ int f0(void) {}
 // CHECK:   <key>main-file</key>
 // CHECK:   <string>{{.*}}cc-log-diagnostics.c</string>
 // CHECK:   <key>dwarf-debug-flags</key>
-// CHECK:   <string>{{.*}}clang{{.*}}-fsyntax-only{{.*}}</string>
+// CHECK:   <string>{{.*}}-Wfoobar{{.*}}-fsyntax-only{{.*}}</string>
 // CHECK:   <key>diagnostics</key>
 // CHECK:   <array>
 // CHECK:     <dict>
diff --git a/clang/test/Driver/cc-print-options.c b/clang/test/Driver/cc-print-options.c
index dc7f4a3..5a17810 100644
--- a/clang/test/Driver/cc-print-options.c
+++ b/clang/test/Driver/cc-print-options.c
@@ -1,8 +1,8 @@
 // RUN: env CC_PRINT_OPTIONS=1 \
 // RUN:     CC_PRINT_OPTIONS_FILE=%t.log \
-// RUN: %clang -no-canonical-prefixes -S -o %t.s %s
+// RUN: %clang -S -o %t.s %s
 // RUN: FileCheck %s < %t.log
 
 // CHECK: [Logging clang options]
-// CHECK: {{.*}}clang{{.*}}"-S"
+// CHECK: {{.*}} "-S"
 
diff --git a/clang/test/Driver/color-diagnostics.c b/clang/test/Driver/color-diagnostics.c
index 0d38a6b..ff4890c 100644
--- a/clang/test/Driver/color-diagnostics.c
+++ b/clang/test/Driver/color-diagnostics.c
@@ -1,31 +1,31 @@
 // RUN: %clang -fcolor-diagnostics -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-CD %s
-// CHECK-CD: clang{{.*}}" "-fcolor-diagnostics"
+// CHECK-CD: "-cc1"{{.*}} "-fcolor-diagnostics"
 
 // RUN: %clang -fno-color-diagnostics -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-NCD %s
-// CHECK-NCD-NOT: clang{{.*}}" "-fcolor-diagnostics"
+// CHECK-NCD-NOT: "-cc1"{{.*}} "-fcolor-diagnostics"
 
 // RUN: %clang -fdiagnostics-color -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-DC %s
-// CHECK-DC: clang{{.*}}" "-fcolor-diagnostics"
+// CHECK-DC: "-cc1"{{.*}} "-fcolor-diagnostics"
 
 // RUN: %clang -fno-diagnostics-color -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-NDC %s
-// CHECK-NDC-NOT: clang{{.*}}" "-fcolor-diagnostics"
+// CHECK-NDC-NOT: "-cc1"{{.*}} "-fcolor-diagnostics"
 
 // RUN: %clang -fdiagnostics-color=always -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-DCE_A %s
-// CHECK-DCE_A: clang{{.*}}" "-fcolor-diagnostics"
+// CHECK-DCE_A: "-cc1"{{.*}} "-fcolor-diagnostics"
 
 // RUN: %clang -fdiagnostics-color=never -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-DCE_N %s
-// CHECK-DCE_N-NOT: clang{{.*}}" "-fcolor-diagnostics"
+// CHECK-DCE_N-NOT: "-cc1"{{.*}} "-fcolor-diagnostics"
 
 // The test doesn't run in a PTY, so "auto" defaults to off.
 // RUN: %clang -fdiagnostics-color=auto -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-DCE_AUTO %s
-// CHECK-DCE_AUTO-NOT: clang{{.*}}" "-fcolor-diagnostics"
+// CHECK-DCE_AUTO-NOT: "-cc1"{{.*}} "-fcolor-diagnostics"
 
 // RUN: %clang -fdiagnostics-color=foo -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-DCE_FOO %s
@@ -34,24 +34,24 @@
 // Check that the last flag wins.
 // RUN: %clang -fno-color-diagnostics -fdiagnostics-color -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-NCD_DC_S %s
-// CHECK-NCD_DC_S: clang{{.*}}" "-fcolor-diagnostics"
+// CHECK-NCD_DC_S: "-cc1"{{.*}} "-fcolor-diagnostics"
 
 // RUN: %clang -fcolor-diagnostics -fno-diagnostics-color -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-CD_NDC_S %s
-// CHECK-CD_NDC_S-NOT: clang{{.*}}" "-fcolor-diagnostics"
+// CHECK-CD_NDC_S-NOT: "-cc1"{{.*}} "-fcolor-diagnostics"
 
 // RUN: %clang -fdiagnostics-color -fno-color-diagnostics -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-DC_NCD_S %s
-// CHECK-DC_NCD_S-NOT: clang{{.*}}" "-fcolor-diagnostics"
+// CHECK-DC_NCD_S-NOT: "-cc1"{{.*}} "-fcolor-diagnostics"
 
 // RUN: %clang -fno-diagnostics-color -fcolor-diagnostics -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-NDC_CD_S %s
-// CHECK-NDC_CD_S: clang{{.*}}" "-fcolor-diagnostics"
+// CHECK-NDC_CD_S: "-cc1"{{.*}} "-fcolor-diagnostics"
 
 // RUN: %clang -fcolor-diagnostics -fdiagnostics-color=auto -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-CD_DCE_AUTO_S %s
-// CHECK-CD_DCE_AUTO_S-NOT: clang{{.*}}" "-fcolor-diagnostics"
+// CHECK-CD_DCE_AUTO_S-NOT: "-cc1"{{.*}} "-fcolor-diagnostics"
 
 // RUN: %clang -fansi-escape-codes -### -c %s 2>&1 \
 // RUN:     | FileCheck --check-prefix=CHECK-AEC %s
-// CHECK-AEC: clang{{.*}}" "-fansi-escape-codes"
+// CHECK-AEC: "-cc1"{{.*}} "-fansi-escape-codes"
diff --git a/clang/test/Driver/compilation_database.c b/clang/test/Driver/compilation_database.c
index da4d1c2..19224d2 100644
--- a/clang/test/Driver/compilation_database.c
+++ b/clang/test/Driver/compilation_database.c
@@ -1,9 +1,9 @@
 // RUN: mkdir -p %t.workdir && cd %t.workdir
-// RUN: %clang -fintegrated-as -MD -MP --sysroot=somewhere -c -x c %s -xc++ %s -Wall -MJ - -no-canonical-prefixes 2>&1 | FileCheck %s
-// RUN: not %clang -c -x c %s -MJ %s/non-existant -no-canonical-prefixes 2>&1 | FileCheck --check-prefix=ERROR %s
+// RUN: %clang -no-canonical-prefixes -fintegrated-as -MD -MP --sysroot=somewhere -c -x c %s -xc++ %s -Wall -MJ - 2>&1 | FileCheck %s
+// RUN: not %clang -no-canonical-prefixes -c -x c %s -MJ %s/non-existant 2>&1 | FileCheck --check-prefix=ERROR %s
 
-// CHECK: { "directory": "{{[^"]*}}workdir",  "file": "[[SRC:[^"]+[/|\\]compilation_database.c]]", "output": "compilation_database.o", "arguments": ["{{[^"]*}}clang{{[^"]*}}", "-xc", "[[SRC]]", "-fintegrated-as", "--sysroot=somewhere", "-c", "-Wall",{{.*}} "--target={{[^"]+}}"]},
-// CHECK: { "directory": "{{.*}}",  "file": "[[SRC:[^"]+[/|\\]compilation_database.c]]", "output": "compilation_database.o", "arguments": ["{{[^"]*}}clang{{[^"]*}}", "-xc++", "[[SRC]]", "-fintegrated-as", "--sysroot=somewhere", "-c", "-Wall",{{.*}} "--target={{[^"]+}}"]},
+// CHECK: { "directory": "{{[^"]*}}workdir",  "file": "[[SRC:[^"]+[/|\\]compilation_database.c]]", "output": "compilation_database.o", "arguments": ["{{[^"]*}}clang{{[^"]*}}", "-xc", "[[SRC]]", "-no-canonical-prefixes", "-fintegrated-as", "--sysroot=somewhere", "-c", "-Wall",{{.*}} "--target={{[^"]+}}"]},
+// CHECK: { "directory": "{{.*}}",  "file": "[[SRC:[^"]+[/|\\]compilation_database.c]]", "output": "compilation_database.o", "arguments": ["{{[^"]*}}clang{{[^"]*}}", "-xc++", "[[SRC]]", "-no-canonical-prefixes", "-fintegrated-as", "--sysroot=somewhere", "-c", "-Wall",{{.*}} "--target={{[^"]+}}"]},
 // ERROR: error: compilation database '{{.*}}/non-existant' could not be opened:
 
 int main(void) {
diff --git a/clang/test/Driver/compiler-rt-unwind.c b/clang/test/Driver/compiler-rt-unwind.c
index c17c4a9..2772bc8 100644
--- a/clang/test/Driver/compiler-rt-unwind.c
+++ b/clang/test/Driver/compiler-rt-unwind.c
@@ -1,21 +1,21 @@
 // General tests that the driver handles combinations of --rtlib=XXX and
 // --unwindlib=XXX properly.
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=libgcc --unwindlib=platform \
 // RUN:     --gcc-toolchain="" -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=RTLIB-GCC %s
 // RTLIB-GCC: "{{.*}}lgcc"
 // RTLIB-GCC: "{{.*}}lgcc_s"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=libgcc --unwindlib=libunwind \
 // RUN:     --gcc-toolchain="" -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=RTLIB-GCC-UNWINDLIB-COMPILER-RT %s
 // RTLIB-GCC-UNWINDLIB-COMPILER-RT: "{{.*}}lgcc"
 // RTLIB-GCC-UNWINDLIB-COMPILER-RT: "{{.*}}l:libunwind.so"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=libgcc --unwindlib=libunwind \
 // RUN:     -static-libgcc \
 // RUN:     --gcc-toolchain="" -resource-dir=%S/Inputs/resource_dir \
@@ -23,33 +23,33 @@
 // RTLIB-GCC-STATIC-UNWINDLIB-COMPILER-RT: "{{.*}}lgcc"
 // RTLIB-GCC-STATIC-UNWINDLIB-COMPILER-RT: "{{.*}}l:libunwind.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1   \
+// RUN: %clang -### %s 2>&1   \
 // RUN:     --target=x86_64-unknown-linux -rtlib=compiler-rt \
 // RUN:     --gcc-toolchain="" -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=RTLIB-COMPILER-RT %s
 // RTLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins-x86_64.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1   \
+// RUN: %clang -### %s 2>&1   \
 // RUN:     --target=x86_64-unknown-linux -rtlib=compiler-rt --unwindlib=libgcc \
 // RUN:     --gcc-toolchain="" -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=RTLIB-COMPILER-RT-UNWINDLIB-GCC %s
 // RTLIB-COMPILER-RT-UNWINDLIB-GCC: "{{.*}}libclang_rt.builtins-x86_64.a"
 // RTLIB-COMPILER-RT-UNWINDLIB-GCC: "{{.*}}lgcc_s"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1              \
+// RUN: %clang -### %s 2>&1              \
 // RUN:     --target=x86_64-unknown-linux -rtlib=compiler-rt --unwindlib=libgcc \
 // RUN:     -static --gcc-toolchain="" -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=RTLIB-COMPILER-RT-UNWINDLIB-GCC-STATIC %s
 // RTLIB-COMPILER-RT-UNWINDLIB-GCC-STATIC: "{{.*}}libclang_rt.builtins-x86_64.a"
 // RTLIB-COMPILER-RT-UNWINDLIB-GCC-STATIC: "{{.*}}lgcc_eh"
 //
-// RUN: not %clang -no-canonical-prefixes %s -o %t.o 2> %t.err              \
+// RUN: not %clang %s 2> %t.err              \
 // RUN:     --target=x86_64-unknown-linux -rtlib=libgcc --unwindlib=libunwind \
 // RUN:     --gcc-toolchain="" -resource-dir=%S/Inputs/resource_dir \
 // RUN: FileCheck --input-file=%t.err --check-prefix=RTLIB-GCC-UNWINDLIB-COMPILER_RT %s
 // RTLIB-GCC-UNWINDLIB-COMPILER_RT: "{{[.|\\\n]*}}--rtlib=libgcc requires --unwindlib=libgcc"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=x86_64-w64-mingw32 -rtlib=compiler-rt --unwindlib=libunwind \
 // RUN:     -shared-libgcc \
 // RUN:     --gcc-toolchain="" \
@@ -57,7 +57,7 @@
 // MINGW-RTLIB-COMPILER-RT-SHARED-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins-x86_64.a"
 // MINGW-RTLIB-COMPILER-RT-SHARED-UNWINDLIB-COMPILER-RT: "{{.*}}l:libunwind.dll.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=x86_64-w64-mingw32 -rtlib=compiler-rt --unwindlib=libunwind \
 // RUN:     -static-libgcc \
 // RUN:     --gcc-toolchain="" \
@@ -65,11 +65,11 @@
 // MINGW-RTLIB-COMPILER-RT-STATIC-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins-x86_64.a"
 // MINGW-RTLIB-COMPILER-RT-STATIC-UNWINDLIB-COMPILER-RT: "{{.*}}l:libunwind.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=x86_64-w64-mingw32 -rtlib=compiler-rt --unwindlib=libunwind \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=MINGW-RTLIB-COMPILER-RT-UNWINDLIB-COMPILER-RT %s
-// RUN: %clangxx -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clangxx -### %s 2>&1 \
 // RUN:     --target=x86_64-w64-mingw32 -rtlib=compiler-rt --unwindlib=libunwind \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=MINGW-RTLIB-COMPILER-RT-UNWINDLIB-COMPILER-RT %s
diff --git a/clang/test/Driver/config-file3.c b/clang/test/Driver/config-file3.c
index fc5c286..8efe1e5 100644
--- a/clang/test/Driver/config-file3.c
+++ b/clang/test/Driver/config-file3.c
@@ -9,7 +9,7 @@
 // RUN: echo "@subdir/cfg-s2" > %t/workdir/cfg-1
 // RUN: echo "-Wundefined-var-template" > %t/workdir/subdir/cfg-s2
 //
-// RUN: ( cd %t && %clang --config workdir/cfg-1 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-REL )
+// RUN: ( cd %t && %clang --config workdir/cfg-1 -c -### %s 2>&1 | FileCheck %s -check-prefix CHECK-REL )
 //
 // CHECK-REL: Configuration file: {{.*}}/workdir/cfg-1
 // CHECK-REL: -Wundefined-var-template
@@ -21,7 +21,7 @@
 // RUN: ln -s %clang %t/testdmode/qqq-clang-g++
 // RUN: echo "-Wundefined-func-template" > %t/testdmode/qqq-clang-g++.cfg
 // RUN: echo "-Werror" > %t/testdmode/qqq.cfg
-// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix FULL-NAME
+// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes -### %s 2>&1 | FileCheck %s -check-prefix FULL-NAME
 //
 // FULL-NAME: Configuration file: {{.*}}/testdmode/qqq-clang-g++.cfg
 // FULL-NAME: -Wundefined-func-template
@@ -31,20 +31,20 @@
 // (As the clang executable and symlink are in different directories, this
 // requires specifying the path via --config-*-dir= though.)
 //
-// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir=%t/testdmode -c %s -### 2>&1 | FileCheck %s -check-prefix SYMLINK
+// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir=%t/testdmode -c -### %s 2>&1 | FileCheck %s -check-prefix SYMLINK
 //
 // SYMLINK: Configuration file: {{.*}}/testdmode/qqq-clang-g++.cfg
 //
 //--- File specified by --config overrides config inferred from clang executable.
 //
-// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config i386-qqq -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-EXPLICIT
+// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config i386-qqq -c -no-canonical-prefixes -### %s 2>&1 | FileCheck %s -check-prefix CHECK-EXPLICIT
 //
 // CHECK-EXPLICIT: Configuration file: {{.*}}/Inputs/config/i386-qqq.cfg
 //
 //--- Invocation qqq-clang-g++ tries to find config file qqq.cfg if qqq-clang-g++.cfg is not found.
 //
 // RUN: rm %t/testdmode/qqq-clang-g++.cfg
-// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix SHORT-NAME
+// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes -### %s 2>&1 | FileCheck %s -check-prefix SHORT-NAME
 //
 // SHORT-NAME: Configuration file: {{.*}}/testdmode/qqq.cfg
 // SHORT-NAME: -Werror
@@ -56,7 +56,7 @@
 // RUN: mkdir %t/testbin
 // RUN: ln -s %clang %t/testbin/clang
 // RUN: echo "-Werror" > %t/testbin/aaa.cfg
-// RUN: %t/testbin/clang --config-system-dir= --config-user-dir= --config aaa.cfg -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-BIN
+// RUN: %t/testbin/clang --config-system-dir= --config-user-dir= --config aaa.cfg -c -no-canonical-prefixes -### %s 2>&1 | FileCheck %s -check-prefix CHECK-BIN
 //
 // CHECK-BIN: Configuration file: {{.*}}/testbin/aaa.cfg
 // CHECK-BIN: -Werror
@@ -71,7 +71,7 @@
 // RUN: ln -s %clang %t/testreload/x86_64-clang-g++
 // RUN: echo "-Wundefined-func-template" > %t/testreload/i386-clang-g++.cfg
 // RUN: echo "-Werror" > %t/testreload/i386.cfg
-// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD
+// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes -### %s 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD
 //
 // CHECK-RELOAD: Configuration file: {{.*}}/testreload/i386-clang-g++.cfg
 // CHECK-RELOAD: -Wundefined-func-template
@@ -79,24 +79,24 @@
 
 //--- If config file is specified by --config and its name does not start with architecture, it is used without reloading.
 //
-// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1a
+// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -m32 -no-canonical-prefixes -### %s 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1a
 //
 // CHECK-RELOAD1a: Configuration file: {{.*}}/Inputs/config-3.cfg
 //
-// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -target i386 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1b
+// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c --target=i386 -no-canonical-prefixes -### %s 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1b
 //
 // CHECK-RELOAD1b: Configuration file: {{.*}}/Inputs/config-3.cfg
 
 //--- If config file is specified by --config and its name starts with architecture, it is reloaded.
 //
-// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config x86_64-qqq -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1c
+// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config x86_64-qqq -c -m32 -no-canonical-prefixes -### %s 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1c
 //
 // CHECK-RELOAD1c: Configuration file: {{.*}}/Inputs/config/i386-qqq.cfg
 
 //--- x86_64-clang-g++ tries to find config i386.cfg if i386-clang-g++.cfg is not found.
 //
 // RUN: rm %t/testreload/i386-clang-g++.cfg
-// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1d
+// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes -### %s 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1d
 //
 // CHECK-RELOAD1d: Configuration file: {{.*}}/testreload/i386.cfg
 // CHECK-RELOAD1d: -Werror
diff --git a/clang/test/Driver/constructors.c b/clang/test/Driver/constructors.c
index e5d453d..1561bc4 100644
--- a/clang/test/Driver/constructors.c
+++ b/clang/test/Driver/constructors.c
@@ -4,95 +4,95 @@
 // CHECK-INIT-ARRAY-NOT: -fno-use-init-array
 // CHECK-NO-INIT-ARRAY: -fno-use-init-array
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     --sysroot=%S/Inputs/resource_dir \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     --sysroot=%S/Inputs/fake_install_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
 // RUN:     -fno-use-init-array \
-// RUN:     -target i386-unknown-linux \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     --sysroot=%S/Inputs/fake_install_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
 // RUN:     -fno-use-init-array -fuse-init-array \
-// RUN:     -target i386-unknown-linux \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     --sysroot=%S/Inputs/fake_install_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
 // RUN:     -fuse-init-array \
-// RUN:     -target i386-unknown-linux \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target arm-unknown-linux-androideabi \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=arm-unknown-linux-androideabi \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target mipsel-unknown-linux-android \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=mipsel-unknown-linux-android \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux-android \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux-android \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target aarch64-none-linux-gnu \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=aarch64-none-linux-gnu \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target aarch64-none-none-eabi \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=aarch64-none-none-eabi \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target arm64-none-linux-gnu \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=arm64-none-linux-gnu \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target arm64-none-none-eabi \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=arm64-none-none-eabi \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-freebsd11 \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-freebsd11 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-INIT-ARRAY %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-freebsd12 \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-freebsd12 \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1        \
-// RUN:     -target sparc-sun-solaris2.11 \
+// RUN: %clang -### %s -fsyntax-only 2>&1        \
+// RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1        \
-// RUN:     -target i386-pc-solaris2.11 \
+// RUN: %clang -### %s -fsyntax-only 2>&1        \
+// RUN:     --target=i386-pc-solaris2.11 \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
diff --git a/clang/test/Driver/coverage-ld.c b/clang/test/Driver/coverage-ld.c
index c15531f..7d6a48b 100644
--- a/clang/test/Driver/coverage-ld.c
+++ b/clang/test/Driver/coverage-ld.c
@@ -1,13 +1,13 @@
 // Test coverage ld flags.
 //
-// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \
-// RUN:     -target i386-unknown-linux -fprofile-arcs \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fprofile-arcs \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LINUX-I386 %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \
-// RUN:     -target i386-unknown-linux --coverage \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux --coverage \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LINUX-I386 %s
@@ -17,8 +17,8 @@
 // CHECK-LINUX-I386-NOT: "-u__llvm_profile_runtime"
 // CHECK-LINUX-I386: "-lc"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux --coverage -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux --coverage -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LINUX-X86-64 %s
@@ -26,8 +26,8 @@
 // CHECK-LINUX-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-LINUX-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}libclang_rt.profile-x86_64.a" {{.*}} "-lc"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-freebsd --coverage -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-freebsd --coverage -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_freebsd64_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-FREEBSD-X86-64 %s
@@ -35,8 +35,8 @@
 // CHECK-FREEBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-FREEBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}freebsd{{/|\\\\}}libclang_rt.profile-x86_64.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target arm-linux-androideabi --coverage -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=arm-linux-androideabi --coverage -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-ARM %s
diff --git a/clang/test/Driver/csky-toolchain.c b/clang/test/Driver/csky-toolchain.c
index 77de511..5762f56 100644
--- a/clang/test/Driver/csky-toolchain.c
+++ b/clang/test/Driver/csky-toolchain.c
@@ -1,17 +1,17 @@
 // UNSUPPORTED: system-windows
 // A basic clang -cc1 command-line, and simple environment check.
 
-// RUN: %clang %s -### -no-canonical-prefixes -target csky 2>&1 | FileCheck -check-prefix=CC1 %s
-// CC1: clang{{.*}} "-cc1" "-triple" "csky"
+// RUN: %clang -### %s --target=csky 2>&1 | FileCheck -check-prefix=CC1 %s
+// CC1: "-cc1" "-triple" "csky"
 
 // Test interaction with -fuse-ld=lld, if lld is available.
-// RUN: %clang %s -### -target csky -fuse-ld=lld 2>&1 | FileCheck -check-prefix=LLD %s
+// RUN: %clang -### %s --target=csky -fuse-ld=lld 2>&1 | FileCheck -check-prefix=LLD %s
 // LLD: {{(error: invalid linker name in argument '-fuse-ld=lld')|(ld.lld)}}
 
 // In the below tests, --rtlib=platform is used so that the driver ignores
 // the configure-time CLANG_DEFAULT_RTLIB option when choosing the runtime lib
 
-// RUN: %clang %s -### -fuse-ld=ld -no-pie --target=csky-unknown-linux-gnu --rtlib=platform  \
+// RUN: %clang -### %s -fuse-ld=ld -no-pie --target=csky-unknown-linux-gnu --rtlib=platform  \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_csky_linux_sdk  2>&1 | FileCheck -check-prefix=C-CSKY-LINUX-MULTI %s
 
 // C-CSKY-LINUX-MULTI: "{{.*}}/Inputs/multilib_csky_linux_sdk/lib/gcc/csky-linux-gnuabiv2/6.3.0/../../..{{/|\\\\}}..{{/|\\\\}}csky-linux-gnuabiv2/bin{{/|\\\\}}ld"
@@ -27,7 +27,7 @@
 // C-CSKY-LINUX-MULTI: "-L{{.*}}/Inputs/multilib_csky_linux_sdk/lib/gcc/csky-linux-gnuabiv2/6.3.0/../../..{{/|\\\\}}..{{/|\\\\}}csky-linux-gnuabiv2/libc/lib"
 // C-CSKY-LINUX-MULTI: "-L{{.*}}/Inputs/multilib_csky_linux_sdk/lib/gcc/csky-linux-gnuabiv2/6.3.0/../../..{{/|\\\\}}..{{/|\\\\}}csky-linux-gnuabiv2/libc/usr/lib"
 
-// RUN: %clang %s -### -fuse-ld=ld -fno-pic -no-pie --target=csky-unknown-linux-gnu --rtlib=platform -march=ck860v \
+// RUN: %clang -### %s -fuse-ld=ld -fno-pic -no-pie --target=csky-unknown-linux-gnu --rtlib=platform -march=ck860v \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_csky_linux_sdk 2>&1 | FileCheck -check-prefix=C-CSKY-LINUX-CK860V %s
 
 // C-CSKY-LINUX-CK860V: "{{.*}}/Inputs/multilib_csky_linux_sdk/lib/gcc/csky-linux-gnuabiv2/6.3.0/../../..{{/|\\\\}}..{{/|\\\\}}csky-linux-gnuabiv2/bin{{/|\\\\}}ld"
diff --git a/clang/test/Driver/cuda-detect.cu b/clang/test/Driver/cuda-detect.cu
index c11bc28..9cc1465d 100644
--- a/clang/test/Driver/cuda-detect.cu
+++ b/clang/test/Driver/cuda-detect.cu
@@ -146,18 +146,18 @@
 // RUN:     -check-prefix NOCUDAINC
 
 // Verify that C++ include paths are passed for both host and device frontends.
-// RUN: %clang -### -no-canonical-prefixes -target x86_64-linux-gnu %s \
+// RUN: %clang -### --target=x86_64-linux-gnu %s \
 // RUN: --stdlib=libstdc++ --sysroot=%S/Inputs/ubuntu_14.04_multiarch_tree2 \
 // RUN: --gcc-toolchain="" 2>&1 \
 // RUN: | FileCheck %s --check-prefix CHECK-CXXINCLUDE
 
 // Verify that CUDA SDK version is propagated to the CC1 compilations.
-// RUN: %clang -### -v -target x86_64-linux-gnu --cuda-gpu-arch=sm_50 \
+// RUN: %clang -### -v --target=x86_64-linux-gnu --cuda-gpu-arch=sm_50 \
 // RUN:   --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix CUDA80
 
 // Verify that if no version file is found, we report the default of 7.0.
-// RUN: %clang -### -v -target x86_64-linux-gnu --cuda-gpu-arch=sm_50 \
+// RUN: %clang -### -v --target=x86_64-linux-gnu --cuda-gpu-arch=sm_50 \
 // RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix CUDA70
 
@@ -186,9 +186,9 @@
 // because we must search the cuda include directory first.
 // CUDAINC-SAME: "-internal-externc-isystem"
 // COMMON-SAME: "-x" "cuda"
-// CHECK-CXXINCLUDE: clang{{.*}} "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CHECK-CXXINCLUDE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // CHECK-CXXINCLUDE-SAME: {{.*}}"-internal-isystem" "{{.+}}/include/c++/4.8"
-// CHECK-CXXINCLUDE: clang{{.*}} "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CHECK-CXXINCLUDE: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
 // CHECK-CXXINCLUDE-SAME: {{.*}}"-internal-isystem" "{{.+}}/include/c++/4.8"
 // CHECK-CXXINCLUDE: ld{{.*}}"
 
diff --git a/clang/test/Driver/cuda-external-tools.cu b/clang/test/Driver/cuda-external-tools.cu
index f8e2edd..198443e 100644
--- a/clang/test/Driver/cuda-external-tools.cu
+++ b/clang/test/Driver/cuda-external-tools.cu
@@ -5,111 +5,111 @@
 // REQUIRES: nvptx-registered-target
 
 // Regular compiles with -O{0,1,2,3,4,fast}.  -O4 and -Ofast map to ptxas O3.
-// RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -O0 -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
-// RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -O1 -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT1 %s
-// RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -O2 -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s
-// RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -O3 -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
-// RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -O4 -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
-// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -Ofast -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
 // Generating relocatable device code
-// RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
 
 // With debugging enabled, ptxas should be run with with no ptxas optimizations.
-// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -g -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-noopt-device-debug -O2 -g -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,DBG %s
 
 // --no-cuda-noopt-device-debug overrides --cuda-noopt-device-debug.
-// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-noopt-device-debug \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN:   --no-cuda-noopt-device-debug -O2 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s
 
 // Regular compile without -O.  This should result in us passing -O0 to ptxas.
-// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
 
 // Regular compiles with -Os and -Oz.  For lack of a better option, we map
 // these to ptxas -O3.
-// RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -Os -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s
-// RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -Oz -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s
 
 // Regular compile targeting sm_35.
-// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
 // Separate compilation targeting sm_35.
-// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
 
 // 32-bit compile.
-// RUN: %clang -### -target i386-linux-gnu -c %s 2>&1 \
+// RUN: %clang -### --target=i386-linux-gnu -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35 %s
 // 32-bit compile when generating relocatable device code.
-// RUN: %clang -### -target i386-linux-gnu -fgpu-rdc -c %s 2>&1 \
+// RUN: %clang -### --target=i386-linux-gnu -fgpu-rdc -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s
 
 // Compile with -fintegrated-as.  This should still cause us to invoke ptxas.
-// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
 // Check that we still pass -c when generating relocatable device code.
-// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
 
 // Check -Xcuda-ptxas and -Xcuda-fatbinary
-// RUN: %clang -### -target x86_64-linux-gnu -c -Xcuda-ptxas -foo1 \
+// RUN: %clang -### --target=x86_64-linux-gnu -c -Xcuda-ptxas -foo1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN:   -Xcuda-fatbinary -bar1 -Xcuda-ptxas -foo2 -Xcuda-fatbinary -bar2 %s 2>&1 \
 // RUN: | FileCheck -check-prefixes=CHECK,SM35,PTXAS-EXTRA,FATBINARY-EXTRA %s
 
 // MacOS spot-checks
-// RUN: %clang -### -target x86_64-apple-macosx -O0 -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-apple-macosx -O0 -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
-// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-apple-macosx --cuda-gpu-arch=sm_35 -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
-// RUN: %clang -### -target i386-apple-macosx -c %s 2>&1 \
+// RUN: %clang -### --target=i386-apple-macosx -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35 %s
 
 // Check relocatable device code generation on MacOS.
-// RUN: %clang -### -target x86_64-apple-macosx -O0 -fgpu-rdc -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-apple-macosx -O0 -fgpu-rdc -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
-// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
+// RUN: %clang -### --target=x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
-// RUN: %clang -### -target i386-apple-macosx -fgpu-rdc -c %s 2>&1 \
+// RUN: %clang -### --target=i386-apple-macosx -fgpu-rdc -c %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s
 
 // Check that CLANG forwards the -v flag to PTXAS.
-// RUN: %clang -### -save-temps -no-canonical-prefixes -v %s 2>&1 \
+// RUN: %clang -### -save-temps -v %s 2>&1 \
 // RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefix=CHK-PTXAS-VERBOSE %s
 
diff --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
index 0f2b5de..c971d15 100644
--- a/clang/test/Driver/cuda-flush-denormals-to-zero.cu
+++ b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -2,37 +2,37 @@
 // -fgpu-flush-denormals-to-zero. This should be translated to
 // -fdenormal-fp-math-f32=preserve-sign
 
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
 // Test alias options -f[no-]cuda-flush-denormals-to-zero
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
 // Test explicit argument, with CUDA offload kind
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-gpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-gpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
 // Test explicit argument, with HIP offload kind
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-gpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-gpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fno-gpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fno-gpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
 // Test the default changing with no argument based on the subtarget in HIP mode
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
 // Test no subtarget, which should get the denormal setting of the default gfx803
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 
 // Test multiple offload archs with different defaults.
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=MIXED-DEFAULT-MODE %s
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fgpu-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZX2 %s
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fno-gpu-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=MIXED-DEFAULT-MODE %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell -fgpu-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZX2 %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell -fno-gpu-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
 
 // CPUFTZ-NOT: -fdenormal-fp-math
diff --git a/clang/test/Driver/darwin-max-type-align.c b/clang/test/Driver/darwin-max-type-align.c
index 6532f4a..bce0a7c 100644
--- a/clang/test/Driver/darwin-max-type-align.c
+++ b/clang/test/Driver/darwin-max-type-align.c
@@ -1,15 +1,15 @@
 // Check the -fmax-type-align=N flag
 // rdar://16254558
 //
-// RUN: %clang -no-canonical-prefixes -target x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \
+// RUN: %clang --target=x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \
 // RUN:   FileCheck -check-prefix=TEST0 %s
 // TEST0: -fmax-type-align=16
-// RUN: %clang -no-canonical-prefixes -fmax-type-align=32 -target x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \
+// RUN: %clang -fmax-type-align=32 --target=x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \
 // RUN:   FileCheck -check-prefix=TEST1 %s
 // TEST1: -fmax-type-align=32
-// RUN: %clang -no-canonical-prefixes -fmax-type-align=32 -fno-max-type-align -target x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \
+// RUN: %clang -fmax-type-align=32 -fno-max-type-align --target=x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \
 // RUN:   FileCheck -check-prefix=TEST2 %s
 // TEST2-NOT: -fmax-type-align
-// RUN: %clang -no-canonical-prefixes -fno-max-type-align -target x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \
+// RUN: %clang -fno-max-type-align --target=x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \
 // RUN:   FileCheck -check-prefix=TEST3 %s
 // TEST3-NOT: -fmax-type-align
diff --git a/clang/test/Driver/darwin-sanitizer-ld.c b/clang/test/Driver/darwin-sanitizer-ld.c
index 4519775..ee84cac 100644
--- a/clang/test/Driver/darwin-sanitizer-ld.c
+++ b/clang/test/Driver/darwin-sanitizer-ld.c
@@ -1,7 +1,7 @@
 // Test sanitizer link flags on Darwin.
 
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \
-// RUN:   -stdlib=platform -fsanitize=address %s -o %t.o 2>&1 \
+// RUN: %clang -### --target=x86_64-darwin \
+// RUN:   -stdlib=platform -fsanitize=address %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN %s
 
 // CHECK-ASAN: "{{.*}}ld{{(.exe)?}}"
@@ -11,7 +11,7 @@
 // CHECK-ASAN: "-rpath" "@executable_path"
 // CHECK-ASAN: "-rpath" "{{.*}}lib{{.*}}darwin"
 
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \
+// RUN: %clang -### --target=x86_64-darwin \
 // RUN:   -fPIC -shared -fsanitize=address %s -o %t.so 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-DYN-ASAN %s
 
@@ -21,8 +21,8 @@
 // CHECK-DYN-ASAN: "-rpath" "@executable_path"
 // CHECK-DYN-ASAN: "-rpath" "{{.*}}lib{{.*}}darwin"
 
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \
-// RUN:   -stdlib=platform -fsanitize=undefined %s -o %t.o 2>&1 \
+// RUN: %clang -### --target=x86_64-darwin \
+// RUN:   -stdlib=platform -fsanitize=undefined %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN %s
 
 // CHECK-UBSAN: "{{.*}}ld{{(.exe)?}}"
@@ -32,15 +32,15 @@
 // CHECK-UBSAN: "-rpath" "@executable_path"
 // CHECK-UBSAN: "-rpath" "{{.*}}lib{{.*}}darwin"
 
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \
+// RUN: %clang -### --target=x86_64-darwin \
 // RUN:   -fsanitize=bounds -fsanitize-undefined-trap-on-error \
-// RUN:   %s -o %t.o 2>&1 \
+// RUN:   %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-BOUNDS %s
 
 // CHECK-BOUNDS: "{{.*}}ld{{(.exe)?}}"
 // CHECK-BOUNDS-NOT: libclang_rt.ubsan_osx.a"
 
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \
+// RUN: %clang -### --target=x86_64-darwin \
 // RUN:   -fPIC -shared -fsanitize=undefined %s -o %t.so 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-DYN-UBSAN %s
 
@@ -50,7 +50,7 @@
 // CHECK-DYN-UBSAN: "-rpath" "@executable_path"
 // CHECK-DYN-UBSAN: "-rpath" "{{.*}}lib{{.*}}darwin"
 
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \
+// RUN: %clang -### --target=x86_64-darwin \
 // RUN:   -fsanitize=bounds -fsanitize-undefined-trap-on-error \
 // RUN:   %s -o %t.so -fPIC -shared 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-DYN-BOUNDS %s
@@ -58,9 +58,9 @@
 // CHECK-DYN-BOUNDS: "{{.*}}ld{{(.exe)?}}"
 // CHECK-DYN-BOUNDS-NOT: ubsan_osx
 
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \
+// RUN: %clang -### --target=x86_64-darwin \
 // RUN:   -stdlib=platform -fsanitize=address -mios-simulator-version-min=7.0 \
-// RUN:   %s -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-ASAN-IOSSIM %s
+// RUN:   %s 2>&1 | FileCheck --check-prefix=CHECK-ASAN-IOSSIM %s
 
 // CHECK-ASAN-IOSSIM: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ASAN-IOSSIM-NOT: "-lstdc++"
@@ -69,9 +69,9 @@
 // CHECK-ASAN-IOSSIM: "-rpath" "@executable_path"
 // CHECK-ASAN-IOSSIM: "-rpath" "{{.*}}lib{{.*}}darwin"
 
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \
+// RUN: %clang -### --target=x86_64-darwin \
 // RUN:   -stdlib=platform -fsanitize=address \
-// RUN:   -mtvos-simulator-version-min=8.3.0 %s -o %t.o 2>&1 \
+// RUN:   -mtvos-simulator-version-min=8.3.0 %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-TVOSSIM %s
 
 // CHECK-ASAN-TVOSSIM: "{{.*}}ld{{(.exe)?}}"
@@ -81,9 +81,9 @@
 // CHECK-ASAN-TVOSSIM: "-rpath" "@executable_path"
 // CHECK-ASAN-TVOSSIM: "-rpath" "{{.*}}lib{{.*}}darwin"
 
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \
+// RUN: %clang -### --target=x86_64-darwin \
 // RUN:   -stdlib=platform -fsanitize=address \
-// RUN:   -mwatchos-simulator-version-min=2.0.0 %s -o %t.o 2>&1 \
+// RUN:   -mwatchos-simulator-version-min=2.0.0 %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-WATCHOSSIM %s
 
 // CHECK-ASAN-WATCHOSSIM: "{{.*}}ld{{(.exe)?}}"
@@ -93,10 +93,10 @@
 // CHECK-ASAN-WATCHOSSIM: "-rpath" "@executable_path"
 // CHECK-ASAN-WATCHOSSIM: "-rpath" "{{.*}}lib{{.*}}darwin"
 
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-apple-ios13.1-macabi \
+// RUN: %clang -### --target=x86_64-apple-ios13.1-macabi \
 // RUN:   -stdlib=platform -fsanitize=address \
 // RUN:   -resource-dir %S/Inputs/resource_dir \
-// RUN:   %s -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-ASAN-MACCATALYST %s
+// RUN:   %s 2>&1 | FileCheck --check-prefix=CHECK-ASAN-MACCATALYST %s
 
 // CHECK-ASAN-MACCATALYST: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ASAN-MACCATALYST-NOT: "-lstdc++"
@@ -105,9 +105,9 @@
 // CHECK-ASAN-MACCATALYST: "-rpath" "@executable_path"
 // CHECK-ASAN-MACCATALYST: "-rpath" "{{.*}}lib{{.*}}darwin"
 
-// RUN: %clang -no-canonical-prefixes -### -target armv7-apple-ios  \
+// RUN: %clang -### --target=armv7-apple-ios  \
 // RUN:   -stdlib=platform -fsanitize=address -miphoneos-version-min=7 \
-// RUN:   %s -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-ASAN-IOS %s
+// RUN:   %s 2>&1 | FileCheck --check-prefix=CHECK-ASAN-IOS %s
 
 // CHECK-ASAN-IOS: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ASAN-IOS-NOT: "-lstdc++"
@@ -116,9 +116,9 @@
 // CHECK-ASAN-IOS: "-rpath" "@executable_path"
 // CHECK-ASAN-IOS: "-rpath" "{{.*}}lib{{.*}}darwin"
 
-// RUN: %clang -no-canonical-prefixes -### -target arm64-apple-tvos \
+// RUN: %clang -### --target=arm64-apple-tvos \
 // RUN:   -stdlib=platform -fsanitize=address -mtvos-version-min=8.3 \
-// RUN:   %s -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-ASAN-TVOS %s
+// RUN:   %s 2>&1 | FileCheck --check-prefix=CHECK-ASAN-TVOS %s
 
 // CHECK-ASAN-TVOS: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ASAN-TVOS-NOT: "-lstdc++"
@@ -127,9 +127,9 @@
 // CHECK-ASAN-TVOS: "-rpath" "@executable_path"
 // CHECK-ASAN-TVOS: "-rpath" "{{.*}}lib{{.*}}darwin"
 
-// RUN: %clang -no-canonical-prefixes -### -target armv7k-apple-watchos \
+// RUN: %clang -### --target=armv7k-apple-watchos \
 // RUN:   -stdlib=platform -fsanitize=address -mwatchos-version-min=2.0 \
-// RUN:   %s -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-ASAN-WATCHOS %s
+// RUN:   %s 2>&1 | FileCheck --check-prefix=CHECK-ASAN-WATCHOS %s
 
 // CHECK-ASAN-WATCHOS: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ASAN-WATCHOS-NOT: "-lstdc++"
diff --git a/clang/test/Driver/darwin-xarch.c b/clang/test/Driver/darwin-xarch.c
index 0df17fd..2b9682a2 100644
--- a/clang/test/Driver/darwin-xarch.c
+++ b/clang/test/Driver/darwin-xarch.c
@@ -1,49 +1,49 @@
-// RUN: %clang -target x86_64-apple-darwin10 -### \
+// RUN: %clang --target=x86_64-apple-darwin10 -### \
 // RUN:   -arch i386 -Xarch_i386 -mmacosx-version-min=10.4 \
 // RUN:   -arch x86_64 -Xarch_x86_64 -mmacosx-version-min=10.5 \
 // RUN:   -c %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-COMPILE < %t %s
 //
-// CHECK-COMPILE: clang{{.*}}" "-cc1" "-triple" "i386-apple-macosx10.4.0" 
-// CHECK-COMPILE: clang{{.*}}" "-cc1" "-triple" "x86_64-apple-macosx10.5.0"
+// CHECK-COMPILE: "-cc1" "-triple" "i386-apple-macosx10.4.0" 
+// CHECK-COMPILE: "-cc1" "-triple" "x86_64-apple-macosx10.5.0"
 
-// RUN: %clang -target x86_64-apple-darwin10 -### \
+// RUN: %clang --target=x86_64-apple-darwin10 -### \
 // RUN:   -arch i386 -Xarch_i386 -Wl,-some-linker-arg -filelist X 2> %t
 // RUN: FileCheck --check-prefix=CHECK-LINK < %t %s
 //
 // CHECK-LINK: ld{{.*}} "-arch" "i386"{{.*}} "-some-linker-arg"
 
-// RUN: %clang -target x86_64-apple-darwin10 -### \
+// RUN: %clang --target=x86_64-apple-darwin10 -### \
 // RUN:   -arch armv7 -Xarch_armv7 -Wl,-some-linker-arg -filelist X 2> %t
 // RUN: FileCheck --check-prefix=CHECK-ARMV7-LINK < %t %s
 //
 // CHECK-ARMV7-LINK: ld{{.*}} "-arch" "armv7"{{.*}} "-some-linker-arg"
 
 
-// RUN: %clang -target armv7s-apple-ios7 -### \
+// RUN: %clang --target=armv7s-apple-ios7 -### \
 // RUN:   -arch armv7  -Xarch_armv7  -DARMV7=1 \
 // RUN:   -arch armv7s -Xarch_armv7s -DARMV7S=1 \
 // RUN:   -c %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-ARMV7S < %t %s
 //
-// CHECK-ARMV7S: clang{{.*}}" "-cc1" "-triple" "thumbv7-apple-ios7.0.0"
+// CHECK-ARMV7S: "-cc1" "-triple" "thumbv7-apple-ios7.0.0"
 // CHECK-ARMV7S-NOT:  "-D" "ARMV7S=1"
 // CHECK-ARMV7S-SAME: "-D" "ARMV7=1"
 //
-// CHECK-ARMV7S: clang{{.*}}" "-cc1" "-triple" "thumbv7s-apple-ios7.0.0"
+// CHECK-ARMV7S: "-cc1" "-triple" "thumbv7s-apple-ios7.0.0"
 // CHECK-ARMV7S-NOT:  "-D" "ARMV7=1"
 // CHECK-ARMV7S-SAME: "-D" "ARMV7S=1"
 
-// RUN: %clang -target arm64-apple-ios14 -### \
+// RUN: %clang --target=arm64-apple-ios14 -### \
 // RUN:   -arch arm64  -Xarch_arm64  -DARM64=1 \
 // RUN:   -arch arm64e -Xarch_arm64e -DARM64E=1 \
 // RUN:   -c %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-ARM64 < %t %s
 //
-// CHECK-ARM64: clang{{.*}}" "-cc1" "-triple" "arm64-apple-ios14.0.0"
+// CHECK-ARM64: "-cc1" "-triple" "arm64-apple-ios14.0.0"
 // CHECK-ARM64-NOT:  "-D" "ARM64E=1"
 // CHECK-ARM64-SAME: "-D" "ARM64=1"
 //
-// CHECK-ARM64: clang{{.*}}" "-cc1" "-triple" "arm64e-apple-ios14.0.0"
+// CHECK-ARM64: "-cc1" "-triple" "arm64e-apple-ios14.0.0"
 // CHECK-ARM64-NOT:  "-D" "ARM64=1"
 // CHECK-ARM64-SAME: "-D" "ARM64E=1"
diff --git a/clang/test/Driver/dragonfly.c b/clang/test/Driver/dragonfly.c
index 683f879..860312d 100644
--- a/clang/test/Driver/dragonfly.c
+++ b/clang/test/Driver/dragonfly.c
@@ -1,7 +1,7 @@
-// RUN: %clang -no-canonical-prefixes -target x86_64-pc-dragonfly %s -### 2> %t.log
+// RUN: %clang --target=x86_64-pc-dragonfly -### %s 2> %t.log
 // RUN: FileCheck -input-file %t.log %s
 
-// CHECK: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-dragonfly"
+// CHECK: "-cc1" "-triple" "x86_64-pc-dragonfly"
 // CHECK: ld{{.*}}" "--eh-frame-hdr" "-dynamic-linker" "/usr/libexec/ld-elf.so.{{.*}}" "--hash-style=gnu" "--enable-new-dtags" "-o" "a.out" "{{.*}}crt1.o" "{{.*}}crti.o" "{{.*}}crtbegin.o" "{{.*}}.o" "-L{{.*}}gcc{{.*}}" "-rpath" "{{.*}}gcc{{.*}}" "-lc" "-lgcc" "{{.*}}crtend.o" "{{.*}}crtn.o"
 
 // -r suppresses default -l and crt*.o like -nostdlib.
diff --git a/clang/test/Driver/env.c b/clang/test/Driver/env.c
index 557c2b0..413b04e 100644
--- a/clang/test/Driver/env.c
+++ b/clang/test/Driver/env.c
@@ -5,20 +5,16 @@
 // REQUIRES: shell
 // The PATH variable is heavily used when trying to find a linker.
 // RUN: env -i LC_ALL=C LD_LIBRARY_PATH="$LD_LIBRARY_PATH" \
-// RUN:   %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     --target=i386-unknown-linux \
+// RUN:   %clang %s -### -o %t.o --target=i386-unknown-linux \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:     --rtlib=platform -no-pie \
-// RUN:     --gcc-toolchain="" \
-// RUN:   | FileCheck --check-prefix=CHECK-LD-32 %s
+// RUN:     --gcc-toolchain="" 2>&1 | FileCheck --check-prefix=CHECK-LD-32 %s
 //
 // RUN: env -i LC_ALL=C PATH="" LD_LIBRARY_PATH="$LD_LIBRARY_PATH" \
-// RUN:   %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     --target=i386-unknown-linux \
+// RUN:   %clang %s -### -o %t.o --target=i386-unknown-linux \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:     --rtlib=platform -no-pie \
-// RUN:     --gcc-toolchain="" \
-// RUN:   | FileCheck --check-prefix=CHECK-LD-32 %s
+// RUN:     --gcc-toolchain="" 2>&1 | FileCheck --check-prefix=CHECK-LD-32 %s
 //
 // CHECK-LD-32-NOT: warning:
 // CHECK-LD-32: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
diff --git a/clang/test/Driver/fat_archive_amdgpu.cpp b/clang/test/Driver/fat_archive_amdgpu.cpp
index 1013d8c..711854c 100644
--- a/clang/test/Driver/fat_archive_amdgpu.cpp
+++ b/clang/test/Driver/fat_archive_amdgpu.cpp
@@ -7,7 +7,7 @@
 // Given a FatArchive, clang-offload-bundler should be called to create a
 // device specific archive, which should be passed to llvm-link.
 // RUN: %clang -O2 -### -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 | FileCheck %s
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "[[GPU:gfx[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.bc]]" "-x" "c++"{{.*}}.cpp
+// CHECK: "-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "[[GPU:gfx[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.bc]]" "-x" "c++"{{.*}}.cpp
 // CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-amdgcn-amd-amdhsa-[[GPU]]" "-output=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
 // CHECK: llvm-link{{.*}}"[[HOSTBC]]" "[[DEVICESPECIFICARCHIVE]]" "-o" "{{.*}}-[[GPU]]-linked-{{.*}}.bc"
 // expected-no-diagnostics
diff --git a/clang/test/Driver/fat_archive_nvptx.cpp b/clang/test/Driver/fat_archive_nvptx.cpp
index aa80cad..22903c0 100644
--- a/clang/test/Driver/fat_archive_nvptx.cpp
+++ b/clang/test/Driver/fat_archive_nvptx.cpp
@@ -6,7 +6,7 @@
 // Given a FatArchive, clang-offload-bundler should be called to create a
 // device specific archive, which should be passed to clang-nvlink-wrapper.
 // RUN: %clang -O2 -### -fopenmp -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 | FileCheck %s
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "[[GPU:sm_[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.s]]" "-x" "c++"{{.*}}.cpp
+// CHECK: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "[[GPU:sm_[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.s]]" "-x" "c++"{{.*}}.cpp
 // CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-nvptx64-nvidia-cuda-[[GPU]]" "-output=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
 // CHECK: clang-nvlink-wrapper{{.*}}"-o" "{{.*}}.out" "-arch" "[[GPU]]" "{{.*}}[[DEVICESPECIFICARCHIVE]]"
 // RUN: not %clang -fopenmp -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda %s %S/Inputs/openmp_static_device_link/empty.o --libomptarget-nvptx-bc-path=%S/Inputs/openmp_static_device_link/lib.bc 2>&1 | FileCheck %s --check-prefix=EMPTY
diff --git a/clang/test/Driver/freebsd.c b/clang/test/Driver/freebsd.c
index dc0b0bb..6c2d0c6 100644
--- a/clang/test/Driver/freebsd.c
+++ b/clang/test/Driver/freebsd.c
@@ -1,29 +1,29 @@
-// RUN: %clang -no-canonical-prefixes \
-// RUN:   -target aarch64-pc-freebsd11 %s                              \
+// RUN: %clang \
+// RUN:   --target=aarch64-pc-freebsd11 %s                              \
 // RUN:   --sysroot=%S/Inputs/basic_freebsd64_tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ARM64 %s
 // CHECK-ARM64: "-cc1" "-triple" "aarch64-pc-freebsd11"
 // CHECK-ARM64: ld{{.*}}" "--sysroot=[[SYSROOT:[^"]+]]"
 // CHECK-ARM64: "--eh-frame-hdr" "-dynamic-linker" "{{.*}}ld-elf{{.*}}" "-o" "a.out" "{{.*}}crt1.o" "{{.*}}crti.o" "{{.*}}crtbegin.o" "-L[[SYSROOT]]/usr/lib" "{{.*}}.o" "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed" "-lc" "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed" "{{.*}}crtend.o" "{{.*}}crtn.o"
 //
-// RUN: %clang -no-canonical-prefixes \
-// RUN:   -target powerpc-pc-freebsd8 %s    \
+// RUN: %clang \
+// RUN:   --target=powerpc-pc-freebsd8 %s    \
 // RUN:   --sysroot=%S/Inputs/basic_freebsd_tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-PPC %s
 // CHECK-PPC: "-cc1" "-triple" "powerpc-pc-freebsd8"
 // CHECK-PPC: ld{{.*}}" "--sysroot=[[SYSROOT:[^"]+]]"
 // CHECK-PPC: "--eh-frame-hdr" "-dynamic-linker" "{{.*}}ld-elf{{.*}}" "-o" "a.out" "{{.*}}crt1.o" "{{.*}}crti.o" "{{.*}}crtbegin.o" "-L[[SYSROOT]]/usr/lib" "{{.*}}.o" "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed" "-lc" "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed" "{{.*}}crtend.o" "{{.*}}crtn.o"
 //
-// RUN: %clang -no-canonical-prefixes \
-// RUN:   -target powerpc64-pc-freebsd8 %s                              \
+// RUN: %clang \
+// RUN:   --target=powerpc64-pc-freebsd8 %s                              \
 // RUN:   --sysroot=%S/Inputs/basic_freebsd64_tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-PPC64 %s
 // CHECK-PPC64: "-cc1" "-triple" "powerpc64-pc-freebsd8"
 // CHECK-PPC64: ld{{.*}}" "--sysroot=[[SYSROOT:[^"]+]]"
 // CHECK-PPC64: "--eh-frame-hdr" "-dynamic-linker" "{{.*}}ld-elf{{.*}}" "-o" "a.out" "{{.*}}crt1.o" "{{.*}}crti.o" "{{.*}}crtbegin.o" "-L[[SYSROOT]]/usr/lib" "{{.*}}.o" "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed" "-lc" "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed" "{{.*}}crtend.o" "{{.*}}crtn.o"
 
-// RUN: %clang -no-canonical-prefixes \
-// RUN:   -target powerpc64le-unknown-freebsd13 %s \
+// RUN: %clang \
+// RUN:   --target=powerpc64le-unknown-freebsd13 %s \
 // RUN:   --sysroot=%S/Inputs/basic_freebsd64_tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-PPC64LE %s
 // CHECK-PPC64LE: "-cc1" "-triple" "powerpc64le-unknown-freebsd13"
@@ -33,61 +33,61 @@
 //
 // Check that -m32 properly adjusts the toolchain flags.
 //
-// RUN: %clang -no-canonical-prefixes -target x86_64-pc-freebsd8 -m32 %s \
+// RUN: %clang --target=x86_64-pc-freebsd8 -m32 %s \
 // RUN:   --sysroot=%S/Inputs/multiarch_freebsd64_tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-LIB32 %s
 // CHECK-LIB32: "-cc1" "-triple" "i386-pc-freebsd8"
 // CHECK-LIB32: ld{{.*}}" {{.*}} "-m" "elf_i386_fbsd"
 //
-// RUN: %clang -target x86_64-pc-freebsd8 -m32 %s 2>&1 \
+// RUN: %clang --target=x86_64-pc-freebsd8 -m32 %s 2>&1 \
 // RUN:   --sysroot=%S/Inputs/multiarch_freebsd64_tree -print-search-dirs 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-LIB32PATHS %s
 // CHECK-LIB32PATHS: libraries: ={{.*:?}}/usr/lib32
 //
 // Check that O32 MIPS uses /usr/lib32 on a 64-bit tree.
 //
-// RUN: %clang -target mips-freebsd12 %s \
+// RUN: %clang --target=mips-freebsd12 %s \
 // RUN:   --sysroot=%S/Inputs/multiarch_freebsd64_tree -print-search-dirs 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-LIB32PATHS %s
 //
 // Check that MIPS passes the correct linker emulation.
 //
-// RUN: %clang -target mips-freebsd %s -### %s 2>&1 \
+// RUN: %clang --target=mips-freebsd -### %s %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS-LD %s
 // CHECK-MIPS-LD: ld{{.*}}" {{.*}} "-m" "elf32btsmip_fbsd"
-// RUN: %clang -target mipsel-freebsd %s -### %s 2>&1 \
+// RUN: %clang --target=mipsel-freebsd -### %s %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPSEL-LD %s
 // CHECK-MIPSEL-LD: ld{{.*}}" {{.*}} "-m" "elf32ltsmip_fbsd"
-// RUN: %clang -target mips64-freebsd %s -### %s 2>&1 \
+// RUN: %clang --target=mips64-freebsd -### %s %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64-LD %s
 // CHECK-MIPS64-LD: ld{{.*}}" {{.*}} "-m" "elf64btsmip_fbsd"
-// RUN: %clang -target mips64el-freebsd %s -### %s 2>&1 \
+// RUN: %clang --target=mips64el-freebsd -### %s %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64EL-LD %s
 // CHECK-MIPS64EL-LD: ld{{.*}}" {{.*}} "-m" "elf64ltsmip_fbsd"
-// RUN: %clang -target mips64-freebsd -mabi=n32 %s -### %s 2>&1 \
+// RUN: %clang --target=mips64-freebsd -mabi=n32 -### %s %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPSN32-LD %s
 // CHECK-MIPSN32-LD: ld{{.*}}" {{.*}} "-m" "elf32btsmipn32_fbsd"
-// RUN: %clang -target mips64el-freebsd -mabi=n32 %s -### %s 2>&1 \
+// RUN: %clang --target=mips64el-freebsd -mabi=n32 -### %s %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPSN32EL-LD %s
 // CHECK-MIPSN32EL-LD: ld{{.*}}" {{.*}} "-m" "elf32ltsmipn32_fbsd"
 //
 // Check that RISC-V passes the correct linker emulation.
 //
-// RUN: %clang -target riscv32-freebsd %s -### %s 2>&1 \
+// RUN: %clang --target=riscv32-freebsd -### %s %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-RV32I-LD %s
 // CHECK-RV32I-LD: ld{{.*}}" {{.*}} "-m" "elf32lriscv"
-// RUN: %clang -target riscv64-freebsd %s -### %s 2>&1 \
+// RUN: %clang --target=riscv64-freebsd -### %s %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-RV64I-LD %s
 // CHECK-RV64I-LD: ld{{.*}}" {{.*}} "-m" "elf64lriscv"
 //
 // Check that the new linker flags are passed to FreeBSD
-// RUN: %clang -no-canonical-prefixes -target x86_64-pc-freebsd8 -m32 %s \
+// RUN: %clang --target=x86_64-pc-freebsd8 -m32 %s \
 // RUN:   --sysroot=%S/Inputs/multiarch_freebsd64_tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-LDFLAGS8 %s
-// RUN: %clang -no-canonical-prefixes -target x86_64-pc-freebsd9 -m32 %s \
+// RUN: %clang --target=x86_64-pc-freebsd9 -m32 %s \
 // RUN:   --sysroot=%S/Inputs/multiarch_freebsd64_tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-LDFLAGS9 %s
-// RUN: %clang -no-canonical-prefixes -target x86_64-pc-freebsd10.0 -m32 %s \
+// RUN: %clang --target=x86_64-pc-freebsd10.0 -m32 %s \
 // RUN:   --sysroot=%S/Inputs/multiarch_freebsd64_tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-LDFLAGS9 %s
 // CHECK-LDFLAGS8-NOT: --hash-style=both
@@ -98,111 +98,111 @@
 // Check that we do not pass --hash-style=gnu and --hash-style=both to linker
 // and provide correct path to the dynamic linker for MIPS platforms.
 // Also verify that we tell the assembler to target the right ISA and ABI.
-// RUN: %clang %s -### -o %t.o 2>&1 \
-// RUN:     -target mips-unknown-freebsd10.0 \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=mips-unknown-freebsd10.0 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS %s
 // CHECK-MIPS: "{{[^" ]*}}ld{{[^" ]*}}"
 // CHECK-MIPS: "-dynamic-linker" "{{.*}}/libexec/ld-elf.so.1"
 // CHECK-MIPS-NOT: "--hash-style={{gnu|both}}"
-// RUN: %clang %s -### -o %t.o 2>&1 \
-// RUN:     -target mipsel-unknown-freebsd10.0 \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=mipsel-unknown-freebsd10.0 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPSEL %s
 // CHECK-MIPSEL: "{{[^" ]*}}ld{{[^" ]*}}"
 // CHECK-MIPSEL: "-dynamic-linker" "{{.*}}/libexec/ld-elf.so.1"
 // CHECK-MIPSEL-NOT: "--hash-style={{gnu|both}}"
-// RUN: %clang %s -### 2>&1 \
-// RUN:     -target mips64-unknown-freebsd10.0 \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=mips64-unknown-freebsd10.0 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64 %s
 // CHECK-MIPS64: "{{[^" ]*}}ld{{[^" ]*}}"
 // CHECK-MIPS64: "-dynamic-linker" "{{.*}}/libexec/ld-elf.so.1"
 // CHECK-MIPS64-NOT: "--hash-style={{gnu|both}}"
-// RUN: %clang %s -### 2>&1 \
-// RUN:     -target mips64el-unknown-freebsd10.0 \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=mips64el-unknown-freebsd10.0 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64EL %s
 // CHECK-MIPS64EL: "{{[^" ]*}}ld{{[^" ]*}}"
 // CHECK-MIPS64EL: "-dynamic-linker" "{{.*}}/libexec/ld-elf.so.1"
 // CHECK-MIPS64EL-NOT: "--hash-style={{gnu|both}}"
 
-// RUN: %clang -no-canonical-prefixes -target x86_64-pc-freebsd8 -static %s \
+// RUN: %clang --target=x86_64-pc-freebsd8 -static %s \
 // RUN:   --sysroot=%S/Inputs/multiarch_freebsd64_tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-STATIC %s
 // CHECK-STATIC: ld{{.*}}" "--eh-frame-hdr" "-Bstatic"
 // CHECK-STATIC: crt1.o
 // CHECK-STATIC: crtbeginT.o
 
-// RUN: %clang -no-canonical-prefixes -target x86_64-pc-freebsd8 -shared %s \
+// RUN: %clang --target=x86_64-pc-freebsd8 -shared %s \
 // RUN:   --sysroot=%S/Inputs/multiarch_freebsd64_tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-SHARED %s
 // CHECK-SHARED: crti.o
 // CHECK-SHARED: crtbeginS.o
 
-// RUN: %clang -no-canonical-prefixes -target x86_64-pc-freebsd8 -pie %s \
+// RUN: %clang --target=x86_64-pc-freebsd8 -pie %s \
 // RUN:   --sysroot=%S/Inputs/multiarch_freebsd64_tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-PIE %s
 // CHECK-PIE: pie
 // CHECK-PIE: Scrt1.o
 // CHECK-PIE: crtbeginS.o
 
-// RUN: %clang -no-canonical-prefixes -target x86_64-pc-freebsd8 %s \
+// RUN: %clang --target=x86_64-pc-freebsd8 %s \
 // RUN:   --sysroot=%S/Inputs/multiarch_freebsd64_tree -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NORMAL %s
 // CHECK-NORMAL: crt1.o
 // CHECK-NORMAL: crtbegin.o
 
-// RUN: %clang %s -### -target arm-unknown-freebsd10.0 -no-integrated-as 2>&1 \
+// RUN: %clang -### %s --target=arm-unknown-freebsd10.0 -no-integrated-as 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ARM %s
 // CHECK-ARM: "-cc1"{{.*}}" "-exception-model=sjlj"
 // CHECK-ARM: as{{.*}}" "-mfpu=softvfp"{{.*}}"-matpcs"
 // CHECK-ARM-EABI-NOT: as{{.*}}" "-mfpu=vfp"
 
-// RUN: %clang %s -### -target arm-gnueabi-freebsd10.0 -no-integrated-as 2>&1 \
+// RUN: %clang -### %s --target=arm-gnueabi-freebsd10.0 -no-integrated-as 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ARM-EABI %s
 // CHECK-ARM-EABI-NOT: "-cc1"{{.*}}" "-exception-model=sjlj"
 // CHECK-ARM-EABI: as{{.*}}" "-mfpu=softvfp" "-meabi=5"
 // CHECK-ARM-EABI-NOT: as{{.*}}" "-mfpu=vfp"
 // CHECK-ARM-EABI-NOT: as{{.*}}" "-matpcs"
 
-// RUN: %clang %s -### -target arm-gnueabihf-freebsd10.0 -no-integrated-as 2>&1 \
+// RUN: %clang -### %s --target=arm-gnueabihf-freebsd10.0 -no-integrated-as 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ARM-EABIHF %s
 // CHECK-ARM-EABIHF-NOT: "-cc1"{{.*}}" "-exception-model=sjlj"
 // CHECK-ARM-EABIHF: as{{.*}}" "-mfpu=vfp" "-meabi=5"
 // CHECK-ARM-EABIHF-NOT: as{{.*}}" "-mfpu=softvfp"
 // CHECK-ARM-EABIHF-NOT: as{{.*}}" "-matpcs"
 
-// RUN: %clang -target sparc-unknown-freebsd8 %s -### -fpic -no-integrated-as 2>&1 \
+// RUN: %clang --target=sparc-unknown-freebsd8 -### %s -fpic -no-integrated-as 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-SPARC-PIE %s
 // CHECK-SPARC-PIE: as{{.*}}" "-KPIC
 
-// RUN: %clang -mcpu=ultrasparc -target sparc64-unknown-freebsd8 %s -### -no-integrated-as 2>&1 \
+// RUN: %clang -mcpu=ultrasparc --target=sparc64-unknown-freebsd8 -### %s -no-integrated-as 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-SPARC-CPU %s
 // CHECK-SPARC-CPU: cc1{{.*}}" "-target-cpu" "ultrasparc"
 // CHECK-SPARC-CPU: as{{.*}}" "-Av9a
 
 // Check that -G flags are passed to the linker for mips
-// RUN: %clang -target mips-unknown-freebsd %s -### -G0 2>&1 \
+// RUN: %clang --target=mips-unknown-freebsd -### %s -G0 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS-G %s
 // CHECK-MIPS-G: ld{{.*}}" "-G0"
 
 // Check CPU type for MIPS
-// RUN: %clang -target mips-unknown-freebsd -### -c %s 2>&1 \
+// RUN: %clang --target=mips-unknown-freebsd -### -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS-CPU %s
-// RUN: %clang -target mipsel-unknown-freebsd -### -c %s 2>&1 \
+// RUN: %clang --target=mipsel-unknown-freebsd -### -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS-CPU %s
 // CHECK-MIPS-CPU: "-target-cpu" "mips2"
 
 // Check CPU type for MIPS64
-// RUN: %clang -target mips64-unknown-freebsd -### -c %s 2>&1 \
+// RUN: %clang --target=mips64-unknown-freebsd -### -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS64-CPU %s
-// RUN: %clang -target mips64el-unknown-freebsd -### -c %s 2>&1 \
+// RUN: %clang --target=mips64el-unknown-freebsd -### -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS64-CPU %s
 // CHECK-MIPS64-CPU: "-target-cpu" "mips3"
 
 // Check that the integrated assembler is enabled for SPARC64
-// RUN: %clang -target sparc64-unknown-freebsd -### -c %s 2>&1 \
+// RUN: %clang --target=sparc64-unknown-freebsd -### -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-IAS %s
 // CHECK-IAS-NOT: "-no-integrated-as"
 
-// RUN: %clang -target ppc64-unknown-freebsd13.0 -### -S %s 2>&1 | \
+// RUN: %clang --target=ppc64-unknown-freebsd13.0 -### -S %s 2>&1 | \
 // RUN: FileCheck -check-prefix=PPC64-MUNWIND %s
 // PPC64-MUNWIND: "-funwind-tables=2"
 
diff --git a/clang/test/Driver/function-sections.c b/clang/test/Driver/function-sections.c
index ba065b1..bfb6cc6 100644
--- a/clang/test/Driver/function-sections.c
+++ b/clang/test/Driver/function-sections.c
@@ -7,68 +7,68 @@
 // CHECK-US-NOT: -fno-unique-section-names
 // CHECK-NOUS: -fno-unique-section-names
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:   | FileCheck --check-prefix=CHECK-NOFS --check-prefix=CHECK-NODS %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -ffunction-sections \
 // RUN:   | FileCheck --check-prefix=CHECK-FS %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -fno-function-sections \
 // RUN:   | FileCheck --check-prefix=CHECK-NOFS %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -ffunction-sections -fno-function-sections \
 // RUN:   | FileCheck --check-prefix=CHECK-NOFS %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -fno-function-sections -ffunction-sections \
 // RUN:   | FileCheck --check-prefix=CHECK-FS %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -ffunction-sections -fno-function-sections -ffunction-sections \
 // RUN:   | FileCheck --check-prefix=CHECK-FS %s
 
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -fdata-sections \
 // RUN:   | FileCheck --check-prefix=CHECK-DS %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -fno-data-sections \
 // RUN:   | FileCheck --check-prefix=CHECK-NODS %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -fdata-sections -fno-data-sections \
 // RUN:   | FileCheck --check-prefix=CHECK-NODS %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -fno-data-sections -fdata-sections \
 // RUN:   | FileCheck --check-prefix=CHECK-DS %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1       \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -fdata-sections -fno-data-sections -fdata-sections \
 // RUN:   | FileCheck --check-prefix=CHECK-DS %s
 
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1        \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1        \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -funique-section-names \
 // RUN:   | FileCheck --check-prefix=CHECK-US %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1        \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -### %s -fsyntax-only 2>&1        \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -fno-unique-section-names \
 // RUN:   | FileCheck --check-prefix=CHECK-NOUS %s
diff --git a/clang/test/Driver/gfortran.f90 b/clang/test/Driver/gfortran.f90
index 6f972cc..c985428 100644
--- a/clang/test/Driver/gfortran.f90
+++ b/clang/test/Driver/gfortran.f90
@@ -3,7 +3,7 @@
 ! being supported by gfortran to GCC when falling back to GCC for
 ! a fortran input file.
 !
-! RUN: %clang -no-canonical-prefixes -target i386-linux -### %s -o %t 2>&1 \
+! RUN: %clang --target=i386-linux -### %s 2>&1 \
 ! RUN:     -Aquestion=answer \
 ! RUN:     -A-question=answer \
 ! RUN:     -C \
@@ -244,7 +244,7 @@
 !
 ! PR22234: Ensure that -fsyntax-only doesn't complain about output types and
 !          passes along correctly.
-! RUN: %clang -no-canonical-prefixes -target i386-linux -fsyntax-only -### %s -o %t 2>&1 | \
+! RUN: %clang --target=i386-linux -fsyntax-only -### %s 2>&1 | \
 ! grep for error message and command-line
 ! RUN: grep -e error: -e -fsyntax-only | FileCheck %s --check-prefix=CHECK-PR22234
 !
@@ -254,6 +254,6 @@
 ! Regression test for the bug introduced with PR22234 fix.
 ! Make sure -fsyntax-only is not passed to gfortran during normal compilation.
 !
-! RUN: %clang -no-canonical-prefixes -target i386-linux -### %s -o %t 2>&1 \
+! RUN: %clang --target=i386-linux -### %s 2>&1 \
 ! RUN: | FileCheck %s --check-prefix=CHECK-PR22234-R
 ! CHECK-PR22234-R-NOT: "-fsyntax-only"
diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip
index 4083cf4..c995fa6 100644
--- a/clang/test/Driver/hip-options.hip
+++ b/clang/test/Driver/hip-options.hip
@@ -5,23 +5,23 @@
 
 // Check that there are commands for both host- and device-side compilations.
 //
-// CHECK: clang{{.*}}" "-cc1" {{.*}} "-fcuda-is-device"
+// CHECK: "-cc1" {{.*}} "-fcuda-is-device"
 // CHECK-SAME: "--gpu-max-threads-per-block=1024"
 
 // RUN: %clang -### -nogpuinc -nogpulib -fgpu-allow-device-init \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=DEVINIT %s
-// DEVINIT: clang{{.*}}" "-cc1" {{.*}}"-fgpu-allow-device-init"
-// DEVINIT: clang{{.*}}" "-cc1" {{.*}}"-fgpu-allow-device-init"
+// DEVINIT: "-cc1" {{.*}}"-fgpu-allow-device-init"
+// DEVINIT: "-cc1" {{.*}}"-fgpu-allow-device-init"
 
 // Check -fgpu-default-stream=per-thread.
 // RUN: %clang -### -nogpuinc -nogpulib -fgpu-default-stream=per-thread \
 // RUN:   %s -save-temps 2>&1 | FileCheck -check-prefix=PTH %s
-// PTH: clang{{.*}}" "-cc1" {{.*}}"-E" {{.*}}"-fgpu-default-stream=per-thread"
-// PTH: clang{{.*}}" "-cc1" {{.*}}"-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
-// PTH: clang{{.*}}" "-cc1" {{.*}}"-E" {{.*}}"-fgpu-default-stream=per-thread"
-// PTH: clang{{.*}}" "-cc1" {{.*}}"-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
+// PTH: "-cc1"{{.*}} "-E" {{.*}}"-fgpu-default-stream=per-thread"
+// PTH: "-cc1"{{.*}} "-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
+// PTH: "-cc1"{{.*}} "-E" {{.*}}"-fgpu-default-stream=per-thread"
+// PTH: "-cc1"{{.*}} "-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
 
-// RUN: %clang -### -x hip -target x86_64-pc-windows-msvc -fms-extensions \
+// RUN: %clang -### -x hip --target=x86_64-pc-windows-msvc -fms-extensions \
 // RUN:   -mllvm -amdgpu-early-inline-all=true  %s 2>&1 | \
 // RUN:   FileCheck -check-prefix=MLLVM %s
 // MLLVM-NOT: "-mllvm"{{.*}}"-amdgpu-early-inline-all=true"{{.*}}"-mllvm"{{.*}}"-amdgpu-early-inline-all=true"
@@ -29,14 +29,14 @@
 // RUN: %clang -### -Xarch_device -g -nogpulib --cuda-gpu-arch=gfx900 \
 // RUN:   -Xarch_device -fcf-protection=branch \
 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=DEV %s
-// DEV: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
-// DEV: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
+// DEV: "-cc1"{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
+// DEV: "-cc1"{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
 // DEV-NOT: clang{{.*}} {{.*}} "-debug-info-kind={{.*}}"
 
 // RUN: %clang -### -Xarch_host -g -nogpulib --cuda-gpu-arch=gfx900 \
 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=HOST %s
-// HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
-// HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
+// HOST-NOT: "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
+// HOST-NOT: "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
 // HOST: clang{{.*}} "-debug-info-kind={{.*}}"
 
 // RUN: %clang -### -nogpuinc -nogpulib -munsafe-fp-atomics \
@@ -47,43 +47,43 @@
 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=DEFAULT-UNSAFE-FP-ATOMICS %s
 // DEFAULT-UNSAFE-FP-ATOMICS-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-munsafe-fp-atomics"
 
-// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fgpu-exclude-wrong-side-overloads \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fgpu-exclude-wrong-side-overloads \
 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=FIX-OVERLOAD %s
-// FIX-OVERLOAD: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fgpu-exclude-wrong-side-overloads" "-fgpu-defer-diag"
-// FIX-OVERLOAD: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fgpu-exclude-wrong-side-overloads" "-fgpu-defer-diag"
+// FIX-OVERLOAD: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fgpu-exclude-wrong-side-overloads" "-fgpu-defer-diag"
+// FIX-OVERLOAD: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fgpu-exclude-wrong-side-overloads" "-fgpu-defer-diag"
 
 // Check -mconstructor-aliases is not passed to device compilation.
 
-// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=CTA %s
-// CTA: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-mconstructor-aliases"
-// CTA-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mconstructor-aliases"
+// CTA: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-mconstructor-aliases"
+// CTA-NOT: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mconstructor-aliases"
 
-// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 // RUN:   --offload-arch=gfx906 -fgpu-inline-threshold=1000 %s 2>&1 | FileCheck -check-prefix=THRESH %s
-// THRESH: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mllvm" "-inline-threshold=1000"
-// THRESH-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-inline-threshold=1000"
+// THRESH: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mllvm" "-inline-threshold=1000"
+// THRESH-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-inline-threshold=1000"
 
 // Check -foffload-lto=thin translated correctly.
 
-// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 // RUN:   --cuda-gpu-arch=gfx906 -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=HIPTHINLTO %s
 
-// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 // RUN:   --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=HIPTHINLTO %s
 
 // Ensure we don't error about -fwhole-program-vtables for the non-device offload compile.
 // HIPTHINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
-// HIPTHINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
-// HIPTHINLTO: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" {{.*}} "-fwhole-program-vtables"
-// HIPTHINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
+// HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
+// HIPTHINLTO: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" {{.*}} "-fwhole-program-vtables"
+// HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
 // HIPTHINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all"
 
 // Check that -flto=thin is handled correctly, particularly with -fwhole-program-vtables.
 //
-// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 // RUN:   --cuda-gpu-arch=gfx906 -flto=thin -fwhole-program-vtables %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=THINLTO %s
 
@@ -91,18 +91,18 @@
 // drop -fwhole-program-vtables for the device offload compile and pass it through for the
 // non-device offload compile along with -flto=thin.
 // THINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
-// THINLTO-NOT: clang{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"
-// THINLTO: clang{{.*}}" "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto=thin" {{.*}} "-fwhole-program-vtables"
-// THINLTO-NOT: clang{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"
+// THINLTO-NOT: "-cc1"{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"
+// THINLTO: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto=thin" {{.*}} "-fwhole-program-vtables"
+// THINLTO-NOT: "-cc1"{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"
 
 // Check -fopenmp is allowed with HIP but -fopenmp-targets= is not allowed.
 
-// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 // RUN:   --offload-arch=gfx906 -fopenmp %s 2>&1 | FileCheck -check-prefix=OMP %s
-// OMP-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fopenmp"
-// OMP: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fopenmp"
+// OMP-NOT: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fopenmp"
+// OMP: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fopenmp"
 
-// RUN: not %clang -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN: not %clang --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 // RUN:   --offload-arch=gfx906 -fopenmp -fopenmp-targets=amdgcn %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=OMPTGT %s
 // OMPTGT: unsupported option '-fopenmp-targets=' for language mode 'HIP'
diff --git a/clang/test/Driver/hip-syntax-only.hip b/clang/test/Driver/hip-syntax-only.hip
index dbaef70..71bc46f 100644
--- a/clang/test/Driver/hip-syntax-only.hip
+++ b/clang/test/Driver/hip-syntax-only.hip
@@ -1,10 +1,10 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: %clang -### -nogpulib -target x86_64 -fsyntax-only %s 2>&1 | FileCheck %s
+// RUN: %clang -### -nogpulib --target=x86_64 -fsyntax-only %s 2>&1 | FileCheck %s
 
 // Check that there are commands for both host- and device-side compilations.
 //
-// CHECK-DAG: clang{{.*}}" "-cc1" {{.*}} "-fcuda-is-device"
-// CHECK-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64"
+// CHECK-DAG: "-cc1" {{.*}} "-fcuda-is-device"
+// CHECK-DAG: "-cc1" "-triple" "x86_64"
 // CHECK-NOT: clang-offload-bundler"
diff --git a/clang/test/Driver/instrprof-ld.c b/clang/test/Driver/instrprof-ld.c
index 27a89d1..4eec7ae 100644
--- a/clang/test/Driver/instrprof-ld.c
+++ b/clang/test/Driver/instrprof-ld.c
@@ -1,7 +1,7 @@
 // Test instrumented profiling ld flags.
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fprofile-instr-generate -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fprofile-instr-generate -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LINUX-I386 %s
@@ -9,8 +9,8 @@
 // CHECK-LINUX-I386: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-LINUX-I386: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}libclang_rt.profile-i386.a" {{.*}} "-lc"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fprofile-instr-generate -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fprofile-instr-generate -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LINUX-X86-64 %s
@@ -18,8 +18,8 @@
 // CHECK-LINUX-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-LINUX-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}libclang_rt.profile-x86_64.a" {{.*}} "-lc"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fprofile-instr-generate -nostdlib -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fprofile-instr-generate -nostdlib -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LINUX-NOSTDLIB-X86-64 %s
@@ -27,8 +27,8 @@
 // CHECK-LINUX-NOSTDLIB-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-LINUX-NOSTDLIB-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}libclang_rt.profile-x86_64.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-freebsd -fprofile-instr-generate -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-freebsd -fprofile-instr-generate -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_freebsd64_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-FREEBSD-X86-64 %s
@@ -36,9 +36,9 @@
 // CHECK-FREEBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-FREEBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}freebsd{{/|\\\\}}libclang_rt.profile-x86_64.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     -shared \
-// RUN:     -target i386-unknown-linux -fprofile-instr-generate -fuse-ld=ld \
+// RUN:     --target=i386-unknown-linux -fprofile-instr-generate -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LINUX-I386-SHARED %s
@@ -46,9 +46,9 @@
 // CHECK-LINUX-I386-SHARED: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-LINUX-I386-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}libclang_rt.profile-i386.a" {{.*}} "-lc"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     -shared \
-// RUN:     -target x86_64-unknown-linux -fprofile-instr-generate -fuse-ld=ld \
+// RUN:     --target=x86_64-unknown-linux -fprofile-instr-generate -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LINUX-X86-64-SHARED %s
@@ -56,9 +56,9 @@
 // CHECK-LINUX-X86-64-SHARED: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-LINUX-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}libclang_rt.profile-x86_64.a" {{.*}} "-lc"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     -shared \
-// RUN:     -target x86_64-unknown-freebsd -fprofile-instr-generate -fuse-ld=ld \
+// RUN:     --target=x86_64-unknown-freebsd -fprofile-instr-generate -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_freebsd64_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-FREEBSD-X86-64-SHARED %s
@@ -66,64 +66,64 @@
 // CHECK-FREEBSD-X86-64-SHARED: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-FREEBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}freebsd{{/|\\\\}}libclang_rt.profile-x86_64.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-apple-darwin14 -fprofile-instr-generate -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-apple-darwin14 -fprofile-instr-generate -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=CHECK-DARWIN-X86-64 %s
 //
 // CHECK-DARWIN-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-DARWIN-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}darwin{{/|\\\\}}libclang_rt.profile_osx.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-apple-darwin14 -fprofile-instr-generate -nostdlib -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-apple-darwin14 -fprofile-instr-generate -nostdlib -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=CHECK-DARWIN-NOSTDLIB-X86-64 %s
 //
 // CHECK-DARWIN-NOSTDLIB-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-DARWIN-NOSTDLIB-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}darwin{{/|\\\\}}libclang_rt.profile_osx.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target arm64-apple-ios -fprofile-instr-generate -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=arm64-apple-ios -fprofile-instr-generate -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=CHECK-DARWIN-ARM64 %s
 //
 // CHECK-DARWIN-ARM64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-DARWIN-ARM64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}darwin{{/|\\\\}}libclang_rt.profile_ios.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target armv7-apple-darwin -mtvos-version-min=8.3 -fprofile-instr-generate -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=armv7-apple-darwin -mtvos-version-min=8.3 -fprofile-instr-generate -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=CHECK-TVOS-ARMV7 %s
 //
 // CHECK-TVOS-ARMV7: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-TVOS-ARMV7: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}darwin{{/|\\\\}}libclang_rt.profile_tvos.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target armv7s-apple-darwin10 -mwatchos-version-min=2.0 -arch armv7k -fprofile-instr-generate -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=armv7s-apple-darwin10 -mwatchos-version-min=2.0 -arch armv7k -fprofile-instr-generate -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=CHECK-WATCHOS-ARMV7 %s
 //
 // CHECK-WATCHOS-ARMV7: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-WATCHOS-ARMV7: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}darwin{{/|\\\\}}libclang_rt.profile_watchos.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-pc-win32 -fprofile-instr-generate \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=i386-pc-win32 -fprofile-instr-generate \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=CHECK-WINDOWS-I386 %s
 //
 // CHECK-WINDOWS-I386: "{{.*}}link{{(.exe)?}}"
 // CHECK-WINDOWS-I386: "{{.*}}clang_rt.profile{{(-i386)?}}.lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-pc-win32 -fprofile-instr-generate \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-pc-win32 -fprofile-instr-generate \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=CHECK-WINDOWS-X86-64 %s
 //
 // CHECK-WINDOWS-X86-64: "{{.*}}link{{(.exe)?}}"
 // CHECK-WINDOWS-X86-64: "{{.*}}clang_rt.profile{{(-x86_64)?}}.lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-mingw32 -fprofile-instr-generate -fuse-ld=ld \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-mingw32 -fprofile-instr-generate -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:   | FileCheck --check-prefix=CHECK-MINGW-X86-64 %s
 //
@@ -132,13 +132,13 @@
 
 // Test instrumented profiling dependent-lib flags
 //
-// RUN: %clang %s -### -o %t.o -target x86_64-pc-win32 \
+// RUN: %clang -### %s --target=x86_64-pc-win32 \
 // RUN:     -fprofile-instr-generate 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-WINDOWS-X86-64-DEPENDENT-LIB %s
 //
 // CHECK-WINDOWS-X86-64-DEPENDENT-LIB: "--dependent-lib={{[^"]*}}clang_rt.profile{{[^"]*}}.lib"
 //
-// RUN: %clang %s -### -o %t.o -target x86_64-mingw32 \
+// RUN: %clang -### %s --target=x86_64-mingw32 \
 // RUN:     -fprofile-instr-generate 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-MINGW-X86-64-DEPENDENT-LIB %s
 //
diff --git a/clang/test/Driver/linux-header-search.cpp b/clang/test/Driver/linux-header-search.cpp
index ced20ed0..6f89985 100644
--- a/clang/test/Driver/linux-header-search.cpp
+++ b/clang/test/Driver/linux-header-search.cpp
@@ -3,70 +3,70 @@
 //
 // Test a simulated installation of libc++ on Linux, both through sysroot and
 // the installation path of Clang.
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu \
 // RUN:     -stdlib=libc++ \
 // RUN:     -ccc-install-dir %S/Inputs/basic_linux_tree/usr/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libcxx_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-BASIC-LIBCXX-SYSROOT %s
-// CHECK-BASIC-LIBCXX-SYSROOT: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-BASIC-LIBCXX-SYSROOT: "-cc1"
 // CHECK-BASIC-LIBCXX-SYSROOT: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-BASIC-LIBCXX-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/include/x86_64-unknown-linux-gnu/c++/v1"
 // CHECK-BASIC-LIBCXX-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/include/c++/v1"
 // CHECK-BASIC-LIBCXX-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu \
 // RUN:     -stdlib=libc++ \
 // RUN:     -ccc-install-dir %S/Inputs/basic_linux_libcxx_tree/usr/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libcxx_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-BASIC-LIBCXX-INSTALL %s
-// CHECK-BASIC-LIBCXX-INSTALL: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-BASIC-LIBCXX-INSTALL: "-cc1"
 // CHECK-BASIC-LIBCXX-INSTALL: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-BASIC-LIBCXX-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/bin/../include/x86_64-unknown-linux-gnu/c++/v1"
 // CHECK-BASIC-LIBCXX-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/bin/../include/c++/v1"
 // CHECK-BASIC-LIBCXX-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu \
 // RUN:     -stdlib=libc++ \
 // RUN:     -ccc-install-dir %S/Inputs/basic_linux_tree/usr/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libcxxv2_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-BASIC-LIBCXXV2-SYSROOT %s
-// CHECK-BASIC-LIBCXXV2-SYSROOT: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-BASIC-LIBCXXV2-SYSROOT: "-cc1"
 // CHECK-BASIC-LIBCXXV2-SYSROOT: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-BASIC-LIBCXXV2-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/include/x86_64-unknown-linux-gnu/c++/v2"
 // CHECK-BASIC-LIBCXXV2-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/include/c++/v2"
 // CHECK-BASIC-LIBCXXV2-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu \
 // RUN:     -stdlib=libc++ \
 // RUN:     -ccc-install-dir %S/Inputs/basic_linux_libcxxv2_tree/usr/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libcxxv2_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-BASIC-LIBCXXV2-INSTALL %s
-// CHECK-BASIC-LIBCXXV2-INSTALL: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-BASIC-LIBCXXV2-INSTALL: "-cc1"
 // CHECK-BASIC-LIBCXXV2-INSTALL: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-BASIC-LIBCXXV2-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/bin/../include/x86_64-unknown-linux-gnu/c++/v2"
 // CHECK-BASIC-LIBCXXV2-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/bin/../include/c++/v2"
 // CHECK-BASIC-LIBCXXV2-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
 //
 // Test Linux with both libc++ and libstdc++ installed.
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu \
 // RUN:     -stdlib=libc++ \
 // RUN:     -ccc-install-dir %S/Inputs/basic_linux_tree/usr/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libstdcxx_libcxxv2_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-BASIC-LIBSTDCXX-LIBCXXV2-SYSROOT %s
-// CHECK-BASIC-LIBSTDCXX-LIBCXXV2-SYSROOT: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-BASIC-LIBSTDCXX-LIBCXXV2-SYSROOT: "-cc1"
 // CHECK-BASIC-LIBSTDCXX-LIBCXXV2-SYSROOT: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-BASIC-LIBSTDCXX-LIBCXXV2-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/include/x86_64-unknown-linux-gnu/c++/v2"
 // CHECK-BASIC-LIBSTDCXX-LIBCXXV2-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/include/c++/v2"
@@ -74,12 +74,12 @@
 
 // Test Gentoo's weirdness both before and after they changed it in their GCC
 // 4.6.4 release.
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/gentoo_linux_gcc_4.6.2_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-GENTOO-4-6-2 %s
-// CHECK-GENTOO-4-6-2: "{{.*}}clang{{.*}}" "-cc1"
+// CHECK-GENTOO-4-6-2: "-cc1"
 // CHECK-GENTOO-4-6-2: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-GENTOO-4-6-2: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-GENTOO-4-6-2: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/x86_64-pc-linux-gnu/4.6.2/include/g++-v4"
@@ -89,12 +89,12 @@
 // CHECK-GENTOO-4-6-2: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
 // CHECK-GENTOO-4-6-2: "-internal-externc-isystem" "[[SYSROOT]]/include"
 // CHECK-GENTOO-4-6-2: "-internal-externc-isystem" "[[SYSROOT]]/usr/include"
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/gentoo_linux_gcc_4.6.4_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-GENTOO-4-6-4 %s
-// CHECK-GENTOO-4-6-4: "{{.*}}clang{{.*}}" "-cc1"
+// CHECK-GENTOO-4-6-4: "-cc1"
 // CHECK-GENTOO-4-6-4: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-GENTOO-4-6-4: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-GENTOO-4-6-4: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/x86_64-pc-linux-gnu/4.6.4/include/g++-v4.6"
@@ -104,12 +104,12 @@
 // CHECK-GENTOO-4-6-4: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
 // CHECK-GENTOO-4-6-4: "-internal-externc-isystem" "[[SYSROOT]]/include"
 // CHECK-GENTOO-4-6-4: "-internal-externc-isystem" "[[SYSROOT]]/usr/include"
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/gentoo_linux_gcc_4.9.3_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-GENTOO-4-9-3 %s
-// CHECK-GENTOO-4-9-3: "{{.*}}clang{{.*}}" "-cc1"
+// CHECK-GENTOO-4-9-3: "-cc1"
 // CHECK-GENTOO-4-9-3: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-GENTOO-4-9-3: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-GENTOO-4-9-3: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/x86_64-pc-linux-gnu/4.9.3/include/g++-v4.9.3"
@@ -123,19 +123,19 @@
 // Test support for Gentoo's gcc-config -- clang should prefer the older
 // (4.9.3) version over the newer (5.4.0) due to preference specified
 // in /etc/env.d/gcc/x86_64-pc-linux-gnu.
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/gentoo_linux_gcc_multi_version_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-GENTOO-4-9-3 %s
 //
 // Test that gcc-config support does not break multilib.
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnux32 -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnux32 -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/gentoo_linux_gcc_multi_version_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-GENTOO-4-9-3-X32 %s
-// CHECK-GENTOO-4-9-3-X32: "{{.*}}clang{{.*}}" "-cc1"
+// CHECK-GENTOO-4-9-3-X32: "-cc1"
 // CHECK-GENTOO-4-9-3-X32: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-GENTOO-4-9-3-X32: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-GENTOO-4-9-3-X32: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/x86_64-pc-linux-gnu/4.9.3/include/g++-v4.9.3"
@@ -146,12 +146,12 @@
 // CHECK-GENTOO-4-9-3-X32: "-internal-externc-isystem" "[[SYSROOT]]/include"
 // CHECK-GENTOO-4-9-3-X32: "-internal-externc-isystem" "[[SYSROOT]]/usr/include"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target i386-unknown-linux-gnu -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=i386-unknown-linux-gnu -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/gentoo_linux_gcc_multi_version_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-GENTOO-4-9-3-32 %s
-// CHECK-GENTOO-4-9-3-32: "{{.*}}clang{{.*}}" "-cc1"
+// CHECK-GENTOO-4-9-3-32: "-cc1"
 // CHECK-GENTOO-4-9-3-32: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-GENTOO-4-9-3-32: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-GENTOO-4-9-3-32: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/x86_64-pc-linux-gnu/4.9.3/include/g++-v4.9.3"
@@ -166,13 +166,13 @@
 // /etc/env.d/gcc/config-x86_64-pc-linux-gnu file to find CURRENT gcc used.
 // Then should pick the multilibs from version 4.9.x specified in
 // /etc/env.d/gcc/x86_64-pc-linux-gnu-4.9.3.
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/gentoo_linux_gcc_4.9.x_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-GENTOO-4-9-X %s
 //
-// CHECK-GENTOO-4-9-X: "{{.*}}clang{{.*}}" "-cc1"
+// CHECK-GENTOO-4-9-X: "-cc1"
 // CHECK-GENTOO-4-9-X: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-GENTOO-4-9-X: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-GENTOO-4-9-X: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/x86_64-pc-linux-gnu/4.9.x/include/g++-v4.9.3"
@@ -183,12 +183,12 @@
 // CHECK-GENTOO-4-9-X: "-internal-externc-isystem" "[[SYSROOT]]/include"
 // CHECK-GENTOO-4-9-X: "-internal-externc-isystem" "[[SYSROOT]]/usr/include"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnux32 -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnux32 -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/gentoo_linux_gcc_4.9.x_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-GENTOO-4-9-X-X32 %s
-// CHECK-GENTOO-4-9-X-X32: "{{.*}}clang{{.*}}" "-cc1"
+// CHECK-GENTOO-4-9-X-X32: "-cc1"
 // CHECK-GENTOO-4-9-X-X32: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-GENTOO-4-9-X-X32: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-GENTOO-4-9-X-X32: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/x86_64-pc-linux-gnu/4.9.x/include/g++-v4.9.3"
@@ -199,12 +199,12 @@
 // CHECK-GENTOO-4-9-X-X32: "-internal-externc-isystem" "[[SYSROOT]]/include"
 // CHECK-GENTOO-4-9-X-X32: "-internal-externc-isystem" "[[SYSROOT]]/usr/include"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target i386-unknown-linux-gnu -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=i386-unknown-linux-gnu -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/gentoo_linux_gcc_4.9.x_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-GENTOO-4-9-X-32 %s
-// CHECK-GENTOO-4-9-X-32: "{{.*}}clang{{.*}}" "-cc1"
+// CHECK-GENTOO-4-9-X-32: "-cc1"
 // CHECK-GENTOO-4-9-X-32: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-GENTOO-4-9-X-32: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-GENTOO-4-9-X-32: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/x86_64-pc-linux-gnu/4.9.x/include/g++-v4.9.3"
@@ -216,12 +216,12 @@
 // CHECK-GENTOO-4-9-X-32: "-internal-externc-isystem" "[[SYSROOT]]/usr/include"
 //
 // Check header search on Debian 6 / MIPS64
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target mips64-unknown-linux-gnuabi64 -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=mips64-unknown-linux-gnuabi64 -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/debian_6_mips64_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64-GNUABI %s
-// CHECK-MIPS64-GNUABI: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-MIPS64-GNUABI: "-cc1"
 // CHECK-MIPS64-GNUABI: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-MIPS64-GNUABI: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-MIPS64-GNUABI: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/mips64-linux-gnuabi64/4.9/../../../../include/c++/4.9"
@@ -234,12 +234,12 @@
 // CHECK-MIPS64-GNUABI: "-internal-externc-isystem" "[[SYSROOT]]/usr/include"
 //
 // Check header search on Debian 6 / MIPS64
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target mips64el-unknown-linux-gnuabi64 -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=mips64el-unknown-linux-gnuabi64 -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/debian_6_mips64_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64EL-GNUABI %s
-// CHECK-MIPS64EL-GNUABI: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-MIPS64EL-GNUABI: "-cc1"
 // CHECK-MIPS64EL-GNUABI: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-MIPS64EL-GNUABI: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-MIPS64EL-GNUABI: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/mips64el-linux-gnuabi64/4.9/../../../../include/c++/4.9"
@@ -253,37 +253,37 @@
 
 
 // Check header search on OpenEmbedded ARM.
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target arm-oe-linux-gnueabi -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=arm-oe-linux-gnueabi -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/openembedded_arm_linux_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-OE-ARM %s
 
-// CHECK-OE-ARM: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-OE-ARM: "-cc1"
 // CHECK-OE-ARM: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-OE-ARM: "-internal-isystem" "[[SYSROOT]]/usr/lib/arm-oe-linux-gnueabi/6.3.0/../../../include/c++/6.3.0"
 // CHECK-OE-ARM: "-internal-isystem" "[[SYSROOT]]/usr/lib/arm-oe-linux-gnueabi/6.3.0/../../../include/c++/6.3.0/backward"
 
 // Check header search on OpenEmbedded AArch64.
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target aarch64-oe-linux -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=aarch64-oe-linux -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/openembedded_aarch64_linux_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-OE-AARCH64 %s
 
-// CHECK-OE-AARCH64: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-OE-AARCH64: "-cc1"
 // CHECK-OE-AARCH64: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-OE-AARCH64: "-internal-isystem" "[[SYSROOT]]/usr/lib64/aarch64-oe-linux/6.3.0/../../../include/c++/6.3.0"
 // CHECK-OE-AARCH64: "-internal-isystem" "[[SYSROOT]]/usr/lib64/aarch64-oe-linux/6.3.0/../../../include/c++/6.3.0/backward"
 
 // Check header search with Cray's gcc package.
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu -stdlib=libstdc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu -stdlib=libstdc++ \
 // RUN:     --sysroot=%S/Inputs/cray_suse_gcc_tree \
 // RUN:     --gcc-toolchain="%S/Inputs/cray_suse_gcc_tree/opt/gcc/8.2.0/snos" \
 // RUN:   | FileCheck --check-prefix=CHECK-CRAY-X86 %s
 
-// CHECK-CRAY-X86: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-CRAY-X86: "-cc1"
 // CHECK-CRAY-X86: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-CRAY-X86: "-internal-isystem" "[[SYSROOT]]/opt/gcc/8.2.0/snos/lib/gcc/x86_64-suse-linux/8.2.0/../../../../include/g++"
 // CHECK-CRAY-X86: "-internal-isystem" "[[SYSROOT]]/opt/gcc/8.2.0/snos/lib/gcc/x86_64-suse-linux/8.2.0/../../../../include/g++/backward"
diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c
index ef3751d..d7c5843 100644
--- a/clang/test/Driver/linux-ld.c
+++ b/clang/test/Driver/linux-ld.c
@@ -2,7 +2,7 @@
 // General tests that ld invocations on Linux targets sane. Note that we use
 // sysroot to make these tests independent of the host system.
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i386-unknown-linux -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -15,7 +15,7 @@
 // CHECK-LD-32: "-L[[SYSROOT]]/lib"
 // CHECK-LD-32: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -34,7 +34,7 @@
 // CHECK-LD-64: "-lc"
 // CHECK-LD-64: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux-gnux32 -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -48,7 +48,7 @@
 // CHECK-LD-X32: "-lc"
 // CHECK-LD-X32: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --gcc-toolchain="" \
@@ -71,7 +71,7 @@
 // CHECK-LD-RT: libclang_rt.builtins-x86_64.a"
 // CHECK-LD-RT: "[[RESDIR]]{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}clang_rt.crtend-x86_64.o"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i686-unknown-linux \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --gcc-toolchain="" \
@@ -94,7 +94,7 @@
 // CHECK-LD-RT-I686: libclang_rt.builtins-i386.a"
 // CHECK-LD-RT-I686: "[[RESDIR]]{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}clang_rt.crtend-i386.o"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-androideabi \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
@@ -109,7 +109,7 @@
 // CHECK-LD-RT-ANDROID: "-lc"
 // CHECK-LD-RT-ANDROID: libclang_rt.builtins-arm-android.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -128,7 +128,7 @@
 // CHECK-LD-GCC: "-lc"
 // CHECK-LD-GCC: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     -static-libgcc \
 // RUN:     --gcc-toolchain="" \
@@ -148,7 +148,7 @@
 // CHECK-LD-64-STATIC-LIBGCC: "-lc"
 // CHECK-LD-64-STATIC-LIBGCC: "-lgcc" "-lgcc_eh"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -158,7 +158,7 @@
 // CHECK-CLANG-NO-LIBGCC: "-lc"
 // CHECK-CLANG-NO-LIBGCC: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
 //
-// RUN: %clangxx -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clangxx -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -168,7 +168,7 @@
 // CHECK-CLANGXX-NO-LIBGCC: "-lc"
 // CHECK-CLANGXX-NO-LIBGCC: "-lgcc_s" "-lgcc"
 //
-// RUN: %clang -static -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -static -### %s 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -176,7 +176,7 @@
 // CHECK-CLANG-NO-LIBGCC-STATIC: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
 // CHECK-CLANG-NO-LIBGCC-STATIC: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"
 //
-// RUN: %clang -static-pie -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -static-pie -### %s 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -191,7 +191,7 @@
 // CHECK-CLANG-LD-STATIC-PIE: "{{.*}}rcrt1.o"
 // CHECK-CLANG-LD-STATIC-PIE: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"
 //
-// RUN: %clang -static-pie -pie -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -static-pie -pie -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -206,7 +206,7 @@
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "{{.*}}rcrt1.o"
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"
 //
-// RUN: %clang -static-pie -static -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -static-pie -static -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -221,14 +221,14 @@
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "{{.*}}rcrt1.o"
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"
 //
-// RUN: %clang -static-pie -nopie -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -static-pie -nopie -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CLANG-LD-STATIC-PIE-NOPIE %s
 // CHECK-CLANG-LD-STATIC-PIE-NOPIE: error: cannot specify 'nopie' along with 'static-pie'
 //
-// RUN: %clang -dynamic -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -dynamic -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -238,7 +238,7 @@
 // CHECK-CLANG-NO-LIBGCC-DYNAMIC: "-lc"
 // CHECK-CLANG-NO-LIBGCC-DYNAMIC: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
 //
-// RUN: %clang -static-libgcc -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -static-libgcc -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -248,7 +248,7 @@
 // CHECK-CLANG-STATIC-LIBGCC: "-lc"
 // CHECK-CLANG-STATIC-LIBGCC: "-lgcc" "-lgcc_eh"
 //
-// RUN: %clang -static-libgcc -dynamic -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -static-libgcc -dynamic -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -258,7 +258,7 @@
 // CHECK-CLANG-STATIC-LIBGCC-DYNAMIC: "-lc"
 // CHECK-CLANG-STATIC-LIBGCC-DYNAMIC: "-lgcc" "-lgcc_eh"
 //
-// RUN: %clang -shared-libgcc -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -shared-libgcc -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -268,7 +268,7 @@
 // CHECK-CLANG-SHARED-LIBGCC: "-lc"
 // CHECK-CLANG-SHARED-LIBGCC: "-lgcc_s" "-lgcc"
 //
-// RUN: %clang -shared-libgcc -dynamic -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -shared-libgcc -dynamic -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -277,7 +277,7 @@
 // CHECK-CLANG-SHARED-LIBGCC-DYNAMIC: "-lc"
 // CHECK-CLANG-SHARED-LIBGCC-DYNAMIC: "-lgcc_s" "-lgcc"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -285,7 +285,7 @@
 // CHECK-CLANG-ANDROID-NONE: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
 // CHECK-CLANG-ANDROID-NONE: "-l:libunwind.a" "-ldl" "-lc"
 //
-// RUN: %clang -shared -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -shared -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -293,7 +293,7 @@
 // CHECK-CLANG-ANDROID-SHARED: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
 // CHECK-CLANG-ANDROID-SHARED: "-l:libunwind.a" "-ldl" "-lc"
 //
-// RUN: %clang -static -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -static -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -301,7 +301,7 @@
 // CHECK-CLANG-ANDROID-STATIC: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
 // CHECK-CLANG-ANDROID-STATIC: "--start-group" "{{[^"]*}}{{/|\\\\}}libclang_rt.builtins-aarch64-android.a" "-l:libunwind.a" "-lc" "--end-group"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1      \
+// RUN: %clang -### %s 2>&1      \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     -static \
 // RUN:     --gcc-toolchain="" \
@@ -327,14 +327,14 @@
 // CHECK-LD-SHARED-STATIC: "{{.*}}/usr/lib/gcc/x86_64-unknown-linux/10.2.0{{/|\\\\}}crtendS.o"
 
 // Check that flags can be combined. The -static dominates.
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \
 // RUN:     -static-libgcc -static \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LD-64-STATIC %s
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i386-unknown-linux -rtlib=platform -m32 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/multilib_32bit_linux_tree \
@@ -350,7 +350,7 @@
 // CHECK-32-TO-32: "-L[[SYSROOT]]/lib"
 // CHECK-32-TO-32: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i386-unknown-linux -rtlib=platform -m64 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/multilib_32bit_linux_tree \
@@ -366,7 +366,7 @@
 // CHECK-32-TO-64: "-L[[SYSROOT]]/lib"
 // CHECK-32-TO-64: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform -m64 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/multilib_64bit_linux_tree \
@@ -382,7 +382,7 @@
 // CHECK-64-TO-64: "-L[[SYSROOT]]/lib"
 // CHECK-64-TO-64: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=plaform -m32 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/multilib_64bit_linux_tree \
@@ -398,7 +398,7 @@
 // CHECK-64-TO-32: "-L[[SYSROOT]]/lib"
 // CHECK-64-TO-32: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux-gnux32 -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/multilib_64bit_linux_tree \
@@ -414,7 +414,7 @@
 // CHECK-X32: "-L[[SYSROOT]]/lib"
 // CHECK-X32: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform -mx32 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/multilib_64bit_linux_tree \
@@ -430,7 +430,7 @@
 // CHECK-64-TO-X32: "-L[[SYSROOT]]/lib"
 // CHECK-64-TO-X32: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i386-unknown-linux -rtlib=platform -mx32 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/multilib_64bit_linux_tree \
@@ -446,7 +446,7 @@
 // CHECK-32-TO-X32: "-L[[SYSROOT]]/lib"
 // CHECK-32-TO-X32: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux-gnux32 -rtlib=platform -m64 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/multilib_64bit_linux_tree \
@@ -462,7 +462,7 @@
 // CHECK-X32-TO-64: "-L[[SYSROOT]]/lib"
 // CHECK-X32-TO-64: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux-gnux32 -rtlib=platform -m32 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/multilib_64bit_linux_tree \
@@ -478,7 +478,7 @@
 // CHECK-X32-TO-32: "-L[[SYSROOT]]/lib"
 // CHECK-X32-TO-32: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=platform -m32 \
 // RUN:     --gcc-toolchain=%S/Inputs/multilib_64bit_linux_tree/usr \
 // RUN:     --sysroot=%S/Inputs/multilib_32bit_linux_tree \
@@ -493,7 +493,7 @@
 //
 // Check that we support unusual patch version formats, including missing that
 // component.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i386-unknown-linux -rtlib=platform -m32 \
 // RUN:     -ccc-install-dir %S/Inputs/gcc_version_parsing1/bin \
 // RUN:     --gcc-toolchain="" \
@@ -504,28 +504,28 @@
 
 // Test a simulated installation of libc++ on Linux, both through sysroot and
 // the installation path of Clang.
-// RUN: %clangxx -no-canonical-prefixes -x c++ %s -no-pie -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu \
+// RUN: %clangxx -x c++ -### %s -no-pie 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu \
 // RUN:     -stdlib=libc++ \
 // RUN:     -ccc-install-dir %S/Inputs/basic_linux_tree/usr/bin \
 // RUN:     --gcc-toolchain="" \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libcxx_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BASIC-LIBCXX-SYSROOT %s
-// CHECK-BASIC-LIBCXX-SYSROOT: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-BASIC-LIBCXX-SYSROOT: "-cc1"
 // CHECK-BASIC-LIBCXX-SYSROOT: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-BASIC-LIBCXX-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/include/c++/v1"
 // CHECK-BASIC-LIBCXX-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
 // CHECK-BASIC-LIBCXX-SYSROOT: "--sysroot=[[SYSROOT]]"
-// RUN: %clang -no-canonical-prefixes -x c++ %s -no-pie -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu \
+// RUN: %clang -x c++ -### %s -no-pie 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu \
 // RUN:     -stdlib=libc++ \
 // RUN:     -ccc-install-dir %S/Inputs/basic_linux_libcxx_tree/usr/bin \
 // RUN:     --gcc-toolchain="" \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libcxx_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BASIC-LIBCXX-INSTALL %s
-// CHECK-BASIC-LIBCXX-INSTALL: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-BASIC-LIBCXX-INSTALL: "-cc1"
 // CHECK-BASIC-LIBCXX-INSTALL: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-BASIC-LIBCXX-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/bin/../include/c++/v1"
 // CHECK-BASIC-LIBCXX-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
@@ -533,22 +533,22 @@
 //
 // Test that we can use -stdlib=libc++ in a build system even when it
 // occasionally links C code instead of C++ code.
-// RUN: %clang -no-canonical-prefixes -x c %s -no-pie -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnu \
+// RUN: %clang -x c -### %s -no-pie 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnu \
 // RUN:     -stdlib=libc++ \
 // RUN:     -ccc-install-dir %S/Inputs/basic_linux_libcxx_tree/usr/bin \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libcxx_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BASIC-LIBCXX-C-LINK %s
 // CHECK-BASIC-LIBCXX-C-LINK-NOT: warning:
-// CHECK-BASIC-LIBCXX-C-LINK: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-BASIC-LIBCXX-C-LINK: "-cc1"
 // CHECK-BASIC-LIBCXX-C-LINK: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-BASIC-LIBCXX-C-LINK-NOT: "-internal-isystem" "[[SYSROOT]]/usr/bin/../include/c++/v1"
 // CHECK-BASIC-LIBCXX-C-LINK: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
 // CHECK-BASIC-LIBCXX-C-LINK: "--sysroot=[[SYSROOT]]"
 //
 // Check multi arch support on Ubuntu 12.04 LTS.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-unknown-linux-gnueabihf -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/ubuntu_12.04_LTS_multiarch_tree \
@@ -564,7 +564,7 @@
 // CHECK-UBUNTU-12-04-ARM-HF: "{{.*}}/usr/lib/arm-linux-gnueabihf{{/|\\\\}}crtn.o"
 //
 // Check Ubuntu 13.10 on x86-64 targeting arm-linux-gnueabihf.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-gnueabihf -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/x86-64_ubuntu_13.10 \
@@ -583,7 +583,7 @@
 // CHECK-X86-64-UBUNTU-13-10-ARM-HF: "{{.*}}/usr/lib/gcc-cross/arm-linux-gnueabihf/4.8/../../../../arm-linux-gnueabihf/lib/../lib{{/|\\\\}}crtn.o"
 //
 // Check Ubuntu 13.10 on x86-64 targeting arm-linux-gnueabi.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-gnueabi -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/x86-64_ubuntu_13.10 \
@@ -602,7 +602,7 @@
 // CHECK-X86-64-UBUNTU-13-10-ARM: "{{.*}}/usr/lib/gcc-cross/arm-linux-gnueabi/4.7/../../../../arm-linux-gnueabi/lib/../lib{{/|\\\\}}crtn.o"
 //
 // Check Ubuntu 14.04 on powerpc64le.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc64le-unknown-linux-gnu -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/ubuntu_14.04_multiarch_tree \
@@ -619,7 +619,7 @@
 //
 // Check Ubuntu 14.04 on x32.
 // "/usr/lib/gcc/x86_64-linux-gnu/4.8/x32/crtend.o" "/usr/lib/gcc/x86_64-linux-gnu/4.8/../../../../libx32/crtn.o"
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux-gnux32 -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/ubuntu_14.04_multiarch_tree \
@@ -636,7 +636,7 @@
 // CHECK-UBUNTU-14-04-X32: "{{.*}}/usr/lib/gcc/x86_64-linux-gnu/4.8/../../../../libx32{{/|\\\\}}crtn.o"
 //
 // Check fedora 18 on arm.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armv7-unknown-linux-gnueabihf -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/fedora_18_tree \
@@ -651,12 +651,12 @@
 // CHECK-FEDORA-18-ARM-HF: "{{.*}}/usr/lib/gcc/armv7hl-redhat-linux-gnueabi/4.7.2/../../../../lib{{/|\\\\}}crtn.o"
 //
 // Check Fedora 21 on AArch64.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm64-unknown-linux-gnu -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/fedora_21_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-FEDORA-21-AARCH64 %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-unknown-linux-gnu -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/fedora_21_tree \
@@ -671,7 +671,7 @@
 // CHECK-FEDORA-21-AARCH64: "{{.*}}/usr/lib/gcc/aarch64-redhat-linux/4.9.0/../../../../lib64{{/|\\\\}}crtn.o"
 //
 // Check Fedora 31 on riscv64.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=riscv64-redhat-linux -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/fedora_31_riscv64_tree \
@@ -685,7 +685,7 @@
 // CHECK-FEDORA-31-RISCV64: "{{.*}}/usr/lib/gcc/riscv64-redhat-linux/9{{/|\\\\}}crtend.o"
 // CHECK-FEDORA-31-RISCV64: "{{.*}}/usr/lib/gcc/riscv64-redhat-linux/9{{/|\\\\}}crtn.o"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-unknown-linux-gnueabi -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/ubuntu_12.04_LTS_multiarch_tree \
@@ -701,7 +701,7 @@
 // CHECK-UBUNTU-12-04-ARM: "{{.*}}/usr/lib/arm-linux-gnueabi{{/|\\\\}}crtn.o"
 //
 // Test the setup that shipped in SUSE 10.3 on ppc64.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc64-suse-linux -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/suse_10.3_ppc64_tree \
@@ -714,12 +714,12 @@
 // CHECK-SUSE-10-3-PPC64: "-L[[SYSROOT]]/usr/lib/../lib64"
 //
 // Check openSuse Leap 42.2 on AArch64
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm64-unknown-linux-gnu -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/opensuse_42.2_aarch64_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-OPENSUSE-42-2-AARCH64 %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-unknown-linux-gnu -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/opensuse_42.2_aarch64_tree \
@@ -734,12 +734,12 @@
 // CHECK-OPENSUSE-42-2-AARCH64: "{{.*}}/usr/lib64/gcc/aarch64-suse-linux/4.8/../../../../lib64{{/|\\\\}}crtn.o"
 //
 // Check openSUSE Tumbleweed on armv6hl
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armv6hl-suse-linux-gnueabi -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/opensuse_tumbleweed_armv6hl_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-OPENSUSE-TW-ARMV6HL %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armv6hl-suse-linux-gnueabi -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/opensuse_tumbleweed_armv6hl_tree \
@@ -754,12 +754,12 @@
 // CHECK-OPENSUSE-TW-ARMV6HL: "{{.*}}/usr/lib/gcc/armv6hl-suse-linux-gnueabi/5/../../../../lib{{/|\\\\}}crtn.o"
 //
 // Check openSUSE Tumbleweed on armv7hl
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armv7hl-suse-linux-gnueabi -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/opensuse_tumbleweed_armv7hl_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-OPENSUSE-TW-ARMV7HL %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armv7hl-suse-linux-gnueabi -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/opensuse_tumbleweed_armv7hl_tree \
@@ -774,12 +774,12 @@
 // CHECK-OPENSUSE-TW-ARMV7HL: "{{.*}}/usr/lib/gcc/armv7hl-suse-linux-gnueabi/5/../../../../lib{{/|\\\\}}crtn.o"
 //
 // Check openSUSE Tumbleweed on riscv64
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=riscv64-suse-linux -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/opensuse_tumbleweed_riscv64_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-OPENSUSE-TW-RISCV64 %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=riscv64-suse-linux -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/opensuse_tumbleweed_riscv64_tree \
@@ -794,7 +794,7 @@
 // CHECK-OPENSUSE-TW-RISCV64: "{{.*}}/usr/lib64/gcc/riscv64-suse-linux/9/../../../../lib64{{/|\\\\}}crtn.o"
 //
 // Check openSUSE Tumbleweed on ppc
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc-unknown-linux-gnu -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/opensuse_tumbleweed_ppc_tree \
@@ -808,63 +808,63 @@
 // CHECK-OPENSUSE-TW-PPC: "{{.*}}/usr/lib/crtn.o"
 //
 // Check dynamic-linker for different archs
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-gnueabi \
 // RUN:   | FileCheck --check-prefix=CHECK-ARM %s
 // CHECK-ARM: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ARM: "-m" "armelf_linux_eabi"
 // CHECK-ARM: "-dynamic-linker" "{{.*}}/lib/ld-linux.so.3"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-gnueabi -mfloat-abi=hard \
 // RUN:   | FileCheck --check-prefix=CHECK-ARM-ABIHF %s
 // CHECK-ARM-ABIHF: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ARM-ABIHF: "-m" "armelf_linux_eabi"
 // CHECK-ARM-ABIHF: "-dynamic-linker" "{{.*}}/lib/ld-linux-armhf.so.3"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-gnueabihf \
 // RUN:   | FileCheck --check-prefix=CHECK-ARM-HF %s
 // CHECK-ARM-HF: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ARM-HF: "-m" "armelf_linux_eabi"
 // CHECK-ARM-HF: "-dynamic-linker" "{{.*}}/lib/ld-linux-armhf.so.3"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc64-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-PPC64 %s
 // CHECK-PPC64: "{{.*}}ld{{(.exe)?}}"
 // CHECK-PPC64: "-m" "elf64ppc"
 // CHECK-PPC64: "-dynamic-linker" "{{.*}}/lib{{(64)?}}/ld64.so.1"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc64-linux-gnu -mabi=elfv1 \
 // RUN:   | FileCheck --check-prefix=CHECK-PPC64-ELFv1 %s
 // CHECK-PPC64-ELFv1: "{{.*}}ld{{(.exe)?}}"
 // CHECK-PPC64-ELFv1: "-m" "elf64ppc"
 // CHECK-PPC64-ELFv1: "-dynamic-linker" "{{.*}}/lib{{(64)?}}/ld64.so.1"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc64-linux-gnu -mabi=elfv2 \
 // RUN:   | FileCheck --check-prefix=CHECK-PPC64-ELFv2 %s
 // CHECK-PPC64-ELFv2: "{{.*}}ld{{(.exe)?}}"
 // CHECK-PPC64-ELFv2: "-m" "elf64ppc"
 // CHECK-PPC64-ELFv2: "-dynamic-linker" "{{.*}}/lib{{(64)?}}/ld64.so.2"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc64le-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-PPC64LE %s
 // CHECK-PPC64LE: "{{.*}}ld{{(.exe)?}}"
 // CHECK-PPC64LE: "-m" "elf64lppc"
 // CHECK-PPC64LE: "-dynamic-linker" "{{.*}}/lib{{(64)?}}/ld64.so.2"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc64le-linux-gnu -mabi=elfv1 \
 // RUN:   | FileCheck --check-prefix=CHECK-PPC64LE-ELFv1 %s
 // CHECK-PPC64LE-ELFv1: "{{.*}}ld{{(.exe)?}}"
 // CHECK-PPC64LE-ELFv1: "-m" "elf64lppc"
 // CHECK-PPC64LE-ELFv1: "-dynamic-linker" "{{.*}}/lib{{(64)?}}/ld64.so.1"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc64le-linux-gnu -mabi=elfv2 \
 // RUN:   | FileCheck --check-prefix=CHECK-PPC64LE-ELFv2 %s
 // CHECK-PPC64LE-ELFv2: "{{.*}}ld{{(.exe)?}}"
@@ -873,7 +873,7 @@
 //
 // Check that we do not pass --hash-style=gnu or --hash-style=both to
 // hexagon linux linker
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=hexagon-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-HEXAGON %s
 // CHECK-HEXAGON: "{{.*}}{{hexagon-link|ld}}{{(.exe)?}}"
@@ -882,7 +882,7 @@
 // Check that we do not pass --hash-style=gnu and --hash-style=both to linker
 // and provide correct path to the dynamic linker and emulation mode when build
 // for MIPS platforms.
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS %s
 // CHECK-MIPS: "{{.*}}ld{{(.exe)?}}"
@@ -890,7 +890,7 @@
 // CHECK-MIPS: "-dynamic-linker" "{{.*}}/lib/ld.so.1"
 // CHECK-MIPS-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mipsel-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPSEL %s
 // CHECK-MIPSEL: "{{.*}}ld{{(.exe)?}}"
@@ -898,21 +898,21 @@
 // CHECK-MIPSEL: "-dynamic-linker" "{{.*}}/lib/ld.so.1"
 // CHECK-MIPSEL-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 --target=mipsel-linux-gnu -mnan=2008 \
+// RUN: %clang -### %s -no-pie 2>&1 --target=mipsel-linux-gnu -mnan=2008 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPSEL-NAN2008 %s
 // CHECK-MIPSEL-NAN2008: "{{.*}}ld{{(.exe)?}}"
 // CHECK-MIPSEL-NAN2008: "-m" "elf32ltsmip"
 // CHECK-MIPSEL-NAN2008: "-dynamic-linker" "{{.*}}/lib/ld-linux-mipsn8.so.1"
 // CHECK-MIPSEL-NAN2008-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 --target=mipsel-linux-gnu -mcpu=mips32r6 \
+// RUN: %clang -### %s -no-pie 2>&1 --target=mipsel-linux-gnu -mcpu=mips32r6 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS32R6EL %s
 // CHECK-MIPS32R6EL: "{{.*}}ld{{(.exe)?}}"
 // CHECK-MIPS32R6EL: "-m" "elf32ltsmip"
 // CHECK-MIPS32R6EL: "-dynamic-linker" "{{.*}}/lib/ld-linux-mipsn8.so.1"
 // CHECK-MIPS32R6EL-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64 %s
 // CHECK-MIPS64: "{{.*}}ld{{(.exe)?}}"
@@ -920,7 +920,7 @@
 // CHECK-MIPS64: "-dynamic-linker" "{{.*}}/lib{{(64)?}}/ld.so.1"
 // CHECK-MIPS64-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64EL %s
 // CHECK-MIPS64EL: "{{.*}}ld{{(.exe)?}}"
@@ -928,21 +928,21 @@
 // CHECK-MIPS64EL: "-dynamic-linker" "{{.*}}/lib{{(64)?}}/ld.so.1"
 // CHECK-MIPS64EL-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 --target=mips64el-linux-gnu -mnan=2008 \
+// RUN: %clang -### %s -no-pie 2>&1 --target=mips64el-linux-gnu -mnan=2008 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64EL-NAN2008 %s
 // CHECK-MIPS64EL-NAN2008: "{{.*}}ld{{(.exe)?}}"
 // CHECK-MIPS64EL-NAN2008: "-m" "elf64ltsmip"
 // CHECK-MIPS64EL-NAN2008: "-dynamic-linker" "{{.*}}/lib{{(64)?}}/ld-linux-mipsn8.so.1"
 // CHECK-MIPS64EL-NAN2008-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 --target=mips64el-linux-gnu -mcpu=mips64r6 \
+// RUN: %clang -### %s -no-pie 2>&1 --target=mips64el-linux-gnu -mcpu=mips64r6 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64R6EL %s
 // CHECK-MIPS64R6EL: "{{.*}}ld{{(.exe)?}}"
 // CHECK-MIPS64R6EL: "-m" "elf64ltsmip"
 // CHECK-MIPS64R6EL: "-dynamic-linker" "{{.*}}/lib{{(64)?}}/ld-linux-mipsn8.so.1"
 // CHECK-MIPS64R6EL-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64-linux-gnu -mabi=n32 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64-N32 %s
 // CHECK-MIPS64-N32: "{{.*}}ld{{(.exe)?}}"
@@ -950,7 +950,7 @@
 // CHECK-MIPS64-N32: "-dynamic-linker" "{{.*}}/lib{{(32)?}}/ld.so.1"
 // CHECK-MIPS64-N32-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-gnu -mabi=n32 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64EL-N32 %s
 // CHECK-MIPS64EL-N32: "{{.*}}ld{{(.exe)?}}"
@@ -958,14 +958,14 @@
 // CHECK-MIPS64EL-N32: "-dynamic-linker" "{{.*}}/lib{{(32)?}}/ld.so.1"
 // CHECK-MIPS64EL-N32-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 --target=mips64el-linux-gnu -mabi=n32 \
+// RUN: %clang -### %s -no-pie 2>&1 --target=mips64el-linux-gnu -mabi=n32 \
 // RUN:   -mnan=2008 | FileCheck --check-prefix=CHECK-MIPS64EL-N32-NAN2008 %s
 // CHECK-MIPS64EL-N32-NAN2008: "{{.*}}ld{{(.exe)?}}"
 // CHECK-MIPS64EL-N32-NAN2008: "-m" "elf32ltsmipn32"
 // CHECK-MIPS64EL-N32-NAN2008: "-dynamic-linker" "{{.*}}/lib{{(32)?}}/ld-linux-mipsn8.so.1"
 // CHECK-MIPS64EL-N32-NAN2008-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 --target=mips64el-redhat-linux \
+// RUN: %clang -### %s -no-pie 2>&1 --target=mips64el-redhat-linux \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64EL-REDHAT %s
 // CHECK-MIPS64EL-REDHAT: "{{.*}}ld{{(.exe)?}}"
 // CHECK-MIPS64EL-REDHAT: "-m" "elf64ltsmip"
@@ -975,47 +975,47 @@
 
 // Check that we pass --hash-style=both for pre-M Android versions and
 // --hash-style=gnu for newer Android versions.
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armv7-linux-android21 \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-HASH-STYLE-L %s
 // CHECK-ANDROID-HASH-STYLE-L: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ANDROID-HASH-STYLE-L: "--hash-style=both"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armv7-linux-android23 \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-HASH-STYLE-M %s
 // CHECK-ANDROID-HASH-STYLE-M: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ANDROID-HASH-STYLE-M: "--hash-style=gnu"
 
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 --target=mips64-linux-gnuabin32 \
+// RUN: %clang -### %s -no-pie 2>&1 --target=mips64-linux-gnuabin32 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64EL-GNUABIN32 %s
 // CHECK-MIPS64EL-GNUABIN32: "{{.*}}ld{{(.exe)?}}"
 // CHECK-MIPS64EL-GNUABIN32: "-m" "elf32btsmipn32"
 // CHECK-MIPS64EL-GNUABIN32: "-dynamic-linker" "{{.*}}/lib{{(32)?}}/ld.so.1"
 // CHECK-MIPS64EL-GNUABIN32-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 --target=mips64-linux-gnuabi64 \
+// RUN: %clang -### %s -no-pie 2>&1 --target=mips64-linux-gnuabi64 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64EL-GNUABI64 %s
 // CHECK-MIPS64EL-GNUABI64: "{{.*}}ld{{(.exe)?}}"
 // CHECK-MIPS64EL-GNUABI64: "-m" "elf64btsmip"
 // CHECK-MIPS64EL-GNUABI64: "-dynamic-linker" "{{.*}}/lib{{(64)?}}/ld.so.1"
 // CHECK-MIPS64EL-GNUABI64-NOT: "--hash-style={{gnu|both}}"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=sparc-unknown-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-SPARCV8 %s
 // CHECK-SPARCV8: "{{.*}}ld{{(.exe)?}}"
 // CHECK-SPARCV8: "-m" "elf32_sparc"
 // CHECK-SPARCV8: "-dynamic-linker" "{{(/usr/sparc-unknown-linux-gnu)?}}/lib/ld-linux.so.2"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=sparcel-unknown-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-SPARCV8EL %s
 // CHECK-SPARCV8EL: "{{.*}}ld{{(.exe)?}}"
 // CHECK-SPARCV8EL: "-m" "elf32_sparc"
 // CHECK-SPARCV8EL: "-dynamic-linker" "{{(/usr/sparcel-unknown-linux-gnu)?}}/lib/ld-linux.so.2"
 //
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=sparcv9-unknown-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-SPARCV9 %s
 // CHECK-SPARCV9: "{{.*}}ld{{(.exe)?}}"
@@ -1023,42 +1023,42 @@
 // CHECK-SPARCV9: "-dynamic-linker" "{{(/usr/sparcv9-unknown-linux-gnu)?}}/lib{{(64)?}}/ld-linux.so.2"
 
 // Test linker invocation on Android.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-androideabi -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mipsel-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i686-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
@@ -1076,48 +1076,48 @@
 // CHECK-ANDROID-NOT: "-lgcc_s"
 // CHECK-ANDROID-NOT: "-lgcc"
 // CHECK-ANDROID: "{{.*}}{{/|\\\\}}crtend_android.o"
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-androideabi -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-SO %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-SO %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-SO %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-SO %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mipsel-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-SO %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-SO %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i686-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-SO %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
@@ -1134,47 +1134,47 @@
 // CHECK-ANDROID-SO-NOT: "-lgcc_s"
 // CHECK-ANDROID-SO-NOT: "-lgcc"
 // CHECK-ANDROID-SO: "{{.*}}{{/|\\\\}}crtend_so.o"
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-androideabi -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -static \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -static \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -static \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -static \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mipsel-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -static \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -static \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i686-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -static \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
@@ -1190,49 +1190,49 @@
 // CHECK-ANDROID-STATIC-NOT: "-lgcc_eh"
 // CHECK-ANDROID-STATIC-NOT: "-lgcc"
 // CHECK-ANDROID-STATIC: "{{.*}}{{/|\\\\}}crtend_android.o"
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-androideabi -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot  \
 // RUN:     -pie \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -pie \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot  \
 // RUN:     -pie \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot  \
 // RUN:     -pie \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mipsel-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -pie \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -pie \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i686-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -pie \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-linux-android -rtlib=platform --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
@@ -1247,42 +1247,42 @@
 // CHECK-ANDROID-PIE-NOT: "-lgcc_s"
 // CHECK-ANDROID-PIE-NOT: "-lgcc"
 // CHECK-ANDROID-PIE: "{{.*}}{{/|\\\\}}crtend_android.o"
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-androideabi \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-32 %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-android \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-32 %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mipsel-linux-android \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-32 %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-linux-android \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-64 %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm64-linux-android \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-64 %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-android \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-64 %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i686-linux-android \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-32 %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-linux-android \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
@@ -1291,88 +1291,88 @@
 // CHECK-ANDROID-64: "-dynamic-linker" "/system/bin/linker64"
 //
 // Test that -pthread does not add -lpthread on Android.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-androideabi -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm64-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mipsel-linux-android -pthread \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i686-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-androideabi -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm64-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mipsel-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i686-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-linux-android -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
@@ -1380,7 +1380,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s
 // CHECK-ANDROID-PTHREAD-NOT: -lpthread
 //
-// RUN: %clang -no-canonical-prefixes %t.o -no-pie -### -o %t 2>&1 \
+// RUN: %clang %t.o -no-pie -### -o %t 2>&1 \
 // RUN:     --target=arm-linux-androideabi -pthread \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
@@ -1388,7 +1388,7 @@
 // CHECK-ANDROID-PTHREAD-LINK-NOT: argument unused during compilation: '-pthread'
 //
 // Check linker invocation on Debian 6 MIPS 32/64-bit.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mipsel-linux-gnu -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/debian_6_mips_tree \
@@ -1404,7 +1404,7 @@
 // CHECK-DEBIAN-ML-MIPSEL: "-L[[SYSROOT]]/lib"
 // CHECK-DEBIAN-ML-MIPSEL: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-gnu -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/debian_6_mips_tree \
@@ -1420,7 +1420,7 @@
 // CHECK-DEBIAN-ML-MIPS64EL: "-L[[SYSROOT]]/lib"
 // CHECK-DEBIAN-ML-MIPS64EL: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-gnu -rtlib=platform -mabi=n32 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/debian_6_mips_tree \
@@ -1436,7 +1436,7 @@
 // CHECK-DEBIAN-ML-MIPS64EL-N32: "-L[[SYSROOT]]/lib"
 // CHECK-DEBIAN-ML-MIPS64EL-N32: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-gnuabi64 -rtlib=platform -mabi=32 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/debian_6_mips64_tree \
@@ -1452,12 +1452,12 @@
 // CHECK-DEBIAN-ML-MIPS64EL-O32: "-L[[SYSROOT]]/lib"
 // CHECK-DEBIAN-ML-MIPS64EL-O32: "-L[[SYSROOT]]/usr/lib"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64-unknown-linux-gnu --rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/debian_6_mips64_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-DEBIAN-ML-MIPS64-GNUABI %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64-linux-gnuabi64 -rtlib=platform -mabi=n64 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/debian_6_mips64_tree \
@@ -1474,12 +1474,12 @@
 // CHECK-DEBIAN-ML-MIPS64-GNUABI: "{{.*}}/usr/lib/gcc/mips64-linux-gnuabi64/4.9{{/|\\\\}}crtend.o"
 // CHECK-DEBIAN-ML-MIPS64-GNUABI: "{{.*}}/usr/lib/mips64-linux-gnuabi64{{/|\\\\}}crtn.o"
 //
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-unknown-linux-gnu -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/debian_6_mips64_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-DEBIAN-ML-MIPS64EL-GNUABI %s
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-linux-gnuabi64 -rtlib=platform -mabi=n64 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/debian_6_mips64_tree \
@@ -1497,7 +1497,7 @@
 // CHECK-DEBIAN-ML-MIPS64EL-GNUABI: "{{.*}}/usr/lib/mips64el-linux-gnuabi64{{/|\\\\}}crtn.o"
 //
 // Test linker invocation for Freescale SDK (OpenEmbedded).
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc-fsl-linux -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/freescale_ppc_tree \
@@ -1507,7 +1507,7 @@
 // CHECK-FSL-PPC: "{{.*}}{{/|\\\\}}crt1.o"
 // CHECK-FSL-PPC: "{{.*}}{{/|\\\\}}crtbegin.o"
 // CHECK-FSL-PPC: "-L[[SYSROOT]]/usr/lib"
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc64-fsl-linux -rtlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/freescale_ppc64_tree \
@@ -1585,7 +1585,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-u %s
 // CHECK-u: "-u" "asdf"
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armeb-unknown-linux \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -1595,7 +1595,7 @@
 // CHECK-ARMEB: "-EB"
 // CHECK-ARMEB: "-m" "armelfb_linux_eabi"
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armebv7-unknown-linux \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -1605,21 +1605,21 @@
 // CHECK-ARMV7EB: "-EB"
 // CHECK-ARMV7EB: "-m" "armelfb_linux_eabi"
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armv7-unknown-linux \
 // RUN:     -mbig-endian \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ARMV7EB %s
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armebv7-unknown-linux \
 // RUN:     -mbig-endian \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ARMV7EB %s
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armv7-unknown-linux \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -1629,21 +1629,21 @@
 // CHECK-ARMV7EL: "-EL"
 // CHECK-ARMV7EL: "-m" "armelf_linux_eabi"
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armebv7-unknown-linux \
 // RUN:     -mlittle-endian \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ARMV7EL %s
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armv7-unknown-linux \
 // RUN:     -mlittle-endian \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ARMV7EL %s
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64_be-unknown-linux \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -1653,21 +1653,21 @@
 // CHECK-AARCH64BE: "-EB"
 // CHECK-AARCH64BE: "-m" "aarch64linuxb"
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-unknown-linux \
 // RUN:     -mbig-endian \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-AARCH64BE %s
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64_be-unknown-linux \
 // RUN:     -mbig-endian \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-AARCH64BE %s
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-unknown-linux \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -1677,7 +1677,7 @@
 // CHECK-AARCH64LE: "-EL"
 // CHECK-AARCH64LE: "-m" "aarch64linux"
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64_be-unknown-linux \
 // RUN:     -mlittle-endian \
 // RUN:     --gcc-toolchain="" \
@@ -1685,73 +1685,73 @@
 // RUN:   | FileCheck --check-prefix=CHECK-AARCH64LE %s
 
 // Check dynamic-linker for musl-libc
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i386-pc-linux-musl \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-X86 %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-pc-linux-musl \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-X86_64 %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips-pc-linux-musl \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-MIPS %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mipsel-pc-linux-musl \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-MIPSEL %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64-pc-linux-musl \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-MIPS64 %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=mips64el-pc-linux-musl \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-MIPS64EL %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc-pc-linux-musl \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-PPC %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpc64-pc-linux-musl \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-PPC64 %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=powerpcspe-pc-linux-musl \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-PPCSPE %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=thumb-pc-linux-musleabi \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-ARM %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=thumb-pc-linux-musleabihf \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-ARMHF %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=thumbv7-pc-linux-musleabi -mhard-float \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-ARMHF %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=thumbeb-pc-linux-musleabi \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-ARMEB %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=thumbeb-pc-linux-musleabihf \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-ARMEBHF %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=thumbv7eb-pc-linux-musleabi -mhard-float \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-ARMEBHF %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-pc-linux-musleabi \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-ARM %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-pc-linux-musleabihf \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-ARMHF %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armv7-pc-linux-musleabi -mhard-float \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-ARMHF %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armeb-pc-linux-musleabi \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-ARMEB %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armeb-pc-linux-musleabihf \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-ARMEBHF %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=armv7eb-pc-linux-musleabi -mhard-float \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-ARMEBHF %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-pc-linux-musleabi \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-AARCH64 %s
-// RUN: %clang %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64_be-pc-linux-musleabi \
 // RUN:   | FileCheck --check-prefix=CHECK-MUSL-AARCH64_BE %s
 // CHECK-MUSL-X86:        "-dynamic-linker" "/lib/ld-musl-i386.so.1"
@@ -1771,7 +1771,7 @@
 // CHECK-MUSL-AARCH64_BE: "-dynamic-linker" "/lib/ld-musl-aarch64_be.so.1"
 
 // Check whether multilib gcc install works fine on Gentoo with gcc-config
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux-gnu -rtlib=platform --unwindlib=platform \
 // RUN:     --sysroot=%S/Inputs/gentoo_linux_gcc_multi_version_tree \
 // RUN:     --gcc-toolchain="" \
@@ -1787,7 +1787,7 @@
 // CHECK-LD-GENTOO: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
 // CHECK-LD-GENTOO: "-lc"
 // CHECK-LD-GENTOO: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=i686-unknown-linux-gnu -rtlib=platform --unwindlib=platform \
 // RUN:     --sysroot=%S/Inputs/gentoo_linux_gcc_multi_version_tree \
 // RUN:     --gcc-toolchain="" \
@@ -1803,7 +1803,7 @@
 // CHECK-LD-GENTOO-32: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
 // CHECK-LD-GENTOO-32: "-lc"
 // CHECK-LD-GENTOO-32: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux-gnux32 -rtlib=platform --unwindlib=platform \
 // RUN:     --sysroot=%S/Inputs/gentoo_linux_gcc_multi_version_tree \
 // RUN:     --gcc-toolchain="" \
@@ -1820,7 +1820,7 @@
 // CHECK-LD-GENTOO-X32: "-lc"
 // CHECK-LD-GENTOO-X32: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
 
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-unknown-linux-gnu \
 // RUN:     --gcc-toolchain="%S/Inputs/rhel_7_tree/opt/rh/devtoolset-7/root/usr" \
 // RUN:     --sysroot=%S/Inputs/rhel_7_tree \
@@ -1831,7 +1831,7 @@
 // CHECK-LD-RHLE7-DTS: [[GCC_INSTALL]/../../../bin/ld
 
 // Check whether gcc7 install works fine on Amazon Linux AMI
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=x86_64-amazon-linux -rtlib=libgcc --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/ami_linux_tree \
@@ -1851,7 +1851,7 @@
 // CHECK-LD-AMI: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
 
 // Check whether the OpenEmbedded ARM libs are added correctly.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=arm-oe-linux-gnueabi -rtlib=libgcc --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/openembedded_arm_linux_tree \
@@ -1870,7 +1870,7 @@
 // CHECK-OE-ARM: "[[SYSROOT]]/usr/lib/arm-oe-linux-gnueabi/6.3.0/../../../lib{{/|\\\\}}crtn.o"
 
 // Check whether the OpenEmbedded AArch64 libs are added correctly.
-// RUN: %clang -no-canonical-prefixes %s -no-pie -### -o %t.o 2>&1 \
+// RUN: %clang -### %s -no-pie 2>&1 \
 // RUN:     --target=aarch64-oe-linux -rtlib=libgcc --unwindlib=platform \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/openembedded_aarch64_linux_tree \
diff --git a/clang/test/Driver/linux-musl-header-search.cpp b/clang/test/Driver/linux-musl-header-search.cpp
index 9fee036..a904cac 100644
--- a/clang/test/Driver/linux-musl-header-search.cpp
+++ b/clang/test/Driver/linux-musl-header-search.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
-// RUN:     -target x86_64-linux-musl -stdlib=libc++ \
+// RUN: %clang -### %s -fsyntax-only 2>&1 \
+// RUN:     --target=x86_64-linux-musl -stdlib=libc++ \
 // RUN:     -ccc-install-dir %S/Inputs/basic_linux_tree/usr/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libcxx_tree --gcc-toolchain= \
@@ -14,8 +14,8 @@
 // CHECK-X86-64-LIBCXX: "-internal-externc-isystem" "[[SYSROOT]]/usr/include"
 // CHECK-X86-64-LIBCXX: "-internal-isystem" "[[RESOURCE_DIR]]{{/|\\\\}}include"
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only -nobuiltininc 2>&1 \
-// RUN:     -target x86_64-linux-musl \
+// RUN: %clang -### %s -fsyntax-only -nobuiltininc 2>&1 \
+// RUN:     --target=x86_64-linux-musl \
 // RUN:     -ccc-install-dir %S/Inputs/basic_linux_tree/usr/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libcxx_tree --gcc-toolchain= \
@@ -24,8 +24,8 @@
 // CHECK-NOBUILTININC: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-NOBUILTININC-NOT: "-internal-isystem" "[[RESOURCE_DIR]]{{/|\\\\}}include"
 
-// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only -nostdlibinc 2>&1 \
-// RUN:     -target x86_64-linux-musl \
+// RUN: %clang -### %s -fsyntax-only -nostdlibinc 2>&1 \
+// RUN:     --target=x86_64-linux-musl \
 // RUN:     -ccc-install-dir %S/Inputs/basic_linux_tree/usr/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libcxx_tree --gcc-toolchain= \
diff --git a/clang/test/Driver/linux-per-target-runtime-dir.c b/clang/test/Driver/linux-per-target-runtime-dir.c
index 9b23774..0162691 100644
--- a/clang/test/Driver/linux-per-target-runtime-dir.c
+++ b/clang/test/Driver/linux-per-target-runtime-dir.c
@@ -1,4 +1,4 @@
-// RUN: %clangxx -no-canonical-prefixes -x c++ %s -### -o %t.o 2>&1 \
+// RUN: %clangxx -x c++ %s -### -o %t.o 2>&1 \
 // RUN:     --target=x86_64-unknown-linux-gnu \
 // RUN:     -stdlib=libc++ \
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
@@ -6,7 +6,7 @@
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libcxx_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-PER-TARGET-RUNTIME %s
-// CHECK-PER-TARGET-RUNTIME: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-PER-TARGET-RUNTIME: "-cc1"
 // CHECK-PER-TARGET-RUNTIME: "-resource-dir" "[[RESDIR:[^"]*]]"
 // CHECK-PER-TARGET-RUNTIME: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-PER-TARGET-RUNTIME: "-internal-isystem" "{{.*}}/../include/c++/v1"
diff --git a/clang/test/Driver/mips-cs.cpp b/clang/test/Driver/mips-cs.cpp
index 39f87d8..a585d06 100644
--- a/clang/test/Driver/mips-cs.cpp
+++ b/clang/test/Driver/mips-cs.cpp
@@ -3,7 +3,7 @@
 // Check frontend and linker invocations on Mentor Graphics MIPS toolchain.
 //
 // = Big-endian, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-linux-gnu -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-32 %s
@@ -31,7 +31,7 @@
 // CHECK-BE-HF-32: "[[TC]]/../../../../mips-linux-gnu/libc/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, hard float, uclibc
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-linux-gnu -muclibc -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-UC-HF-32 %s
@@ -60,7 +60,7 @@
 // CHECK-BE-UC-HF-32: "[[TC]]/../../../../mips-linux-gnu/libc/uclibc/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, hard float, mips16
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-linux-gnu -mips16 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-16 %s
@@ -89,7 +89,7 @@
 // CHECK-BE-HF-16: "[[TC]]/../../../../mips-linux-gnu/libc/mips16/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, hard float, mmicromips
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-linux-gnu -mmicromips -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-MICRO %s
@@ -118,7 +118,7 @@
 // CHECK-BE-HF-MICRO: "[[TC]]/../../../../mips-linux-gnu/libc/micromips/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, hard float, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-linux-gnu -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-NAN %s
@@ -147,7 +147,7 @@
 // CHECK-BE-HF-NAN: "[[TC]]/../../../../mips-linux-gnu/libc/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, hard float, uclibc, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-linux-gnu -muclibc -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-UC-HF-NAN %s
@@ -176,7 +176,7 @@
 // CHECK-BE-UC-HF-NAN: "[[TC]]/../../../../mips-linux-gnu/libc/uclibc/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-linux-gnu -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-32 %s
@@ -205,7 +205,7 @@
 // CHECK-BE-SF-32: "[[TC]]/../../../../mips-linux-gnu/libc/soft-float/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, soft float, uclibc
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-linux-gnu -muclibc -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-UC-SF-32 %s
@@ -234,7 +234,7 @@
 // CHECK-BE-UC-SF-32: "[[TC]]/../../../../mips-linux-gnu/libc/uclibc/soft-float/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, soft float, mips16
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-linux-gnu -msoft-float -mips16 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-16 %s
@@ -263,7 +263,7 @@
 // CHECK-BE-SF-16: "[[TC]]/../../../../mips-linux-gnu/libc/mips16/soft-float/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, soft float, micromips
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-linux-gnu -msoft-float -mmicromips -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-MICRO %s
@@ -292,7 +292,7 @@
 // CHECK-BE-SF-MICRO: "[[TC]]/../../../../mips-linux-gnu/libc/micromips/soft-float/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, hard float, 64-bit
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-linux-gnu -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-64 %s
@@ -321,7 +321,7 @@
 // CHECK-BE-HF-64: "[[TC]]/../../../../mips-linux-gnu/libc/usr/lib/../lib64{{/|\\\\}}crtn.o"
 //
 // = Big-endian, soft float, 64-bit
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-linux-gnu -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-64 %s
@@ -350,7 +350,7 @@
 // CHECK-BE-SF-64: "[[TC]]/../../../../mips-linux-gnu/libc/soft-float/usr/lib/../lib64{{/|\\\\}}crtn.o"
 //
 // = Little-endian, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-linux-gnu -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-32 %s
@@ -379,7 +379,7 @@
 // CHECK-EL-HF-32: "[[TC]]/../../../../mips-linux-gnu/libc/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, hard float, uclibc
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-linux-gnu -mhard-float -muclibc -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-UC-HF-32 %s
@@ -408,7 +408,7 @@
 // CHECK-EL-UC-HF-32: "[[TC]]/../../../../mips-linux-gnu/libc/uclibc/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, hard float, mips16
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-linux-gnu -mips16 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-16 %s
@@ -437,7 +437,7 @@
 // CHECK-EL-HF-16: "[[TC]]/../../../../mips-linux-gnu/libc/mips16/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, hard float, micromips
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-linux-gnu -mmicromips -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-MICRO %s
@@ -466,7 +466,7 @@
 // CHECK-EL-HF-MICRO: "[[TC]]/../../../../mips-linux-gnu/libc/micromips/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, hard float, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-linux-gnu -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-NAN %s
@@ -495,7 +495,7 @@
 // CHECK-EL-HF-NAN: "[[TC]]/../../../../mips-linux-gnu/libc/nan2008/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, hard float, uclibc, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-linux-gnu -muclibc -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-UC-HF-NAN %s
@@ -524,7 +524,7 @@
 // CHECK-EL-UC-HF-NAN: "[[TC]]/../../../../mips-linux-gnu/libc/uclibc/nan2008/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-linux-gnu -mfloat-abi=soft -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-32 %s
@@ -553,7 +553,7 @@
 // CHECK-EL-SF-32: "[[TC]]/../../../../mips-linux-gnu/libc/soft-float/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, soft float, uclibc
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-linux-gnu -mfloat-abi=soft -muclibc -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-UC-SF-32 %s
@@ -582,7 +582,7 @@
 // CHECK-EL-UC-SF-32: "[[TC]]/../../../../mips-linux-gnu/libc/uclibc/soft-float/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, soft float, mips16
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-linux-gnu -mips16 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-16 %s
@@ -611,7 +611,7 @@
 // CHECK-EL-SF-16: "[[TC]]/../../../../mips-linux-gnu/libc/mips16/soft-float/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, soft float, micromips
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-linux-gnu -mmicromips -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-MICRO %s
@@ -640,7 +640,7 @@
 // CHECK-EL-SF-MICRO: "[[TC]]/../../../../mips-linux-gnu/libc/micromips/soft-float/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, hard float, 64-bit
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-linux-gnu -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-64 %s
@@ -669,7 +669,7 @@
 // CHECK-EL-HF-64: "[[TC]]/../../../../mips-linux-gnu/libc/el/usr/lib/../lib64{{/|\\\\}}crtn.o"
 //
 // = Little-endian, soft float, 64-bit
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-linux-gnu -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-64 %s
diff --git a/clang/test/Driver/mips-fsf.cpp b/clang/test/Driver/mips-fsf.cpp
index f67ad4e..58c7a1b 100644
--- a/clang/test/Driver/mips-fsf.cpp
+++ b/clang/test/Driver/mips-fsf.cpp
@@ -3,7 +3,7 @@
 // Check frontend and linker invocations on FSF MIPS toolchain.
 //
 // = Big-endian, mips32, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-32 %s
@@ -30,7 +30,7 @@
 // CHECK-BE-HF-32: "[[TC]]/../../../../sysroot/mips32/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32, hard float, fp64
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF64-32 %s
@@ -57,7 +57,7 @@
 // CHECK-BE-HF64-32: "[[TC]]/../../../../sysroot/mips32/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-32 %s
@@ -84,7 +84,7 @@
 // CHECK-BE-SF-32: "[[TC]]/../../../../sysroot/mips32/sof/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips16 / mips32, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32 -mips16 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-16 %s
@@ -111,7 +111,7 @@
 // CHECK-BE-HF-16: "[[TC]]/../../../../sysroot/mips32/mips16/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips16 / mips32, hard float, fp64
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32 -mips16 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF64-16 %s
@@ -138,7 +138,7 @@
 // CHECK-BE-HF64-16: "[[TC]]/../../../../sysroot/mips32/mips16/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips16 / mips32, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32 -mips16 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-16 %s
@@ -165,7 +165,7 @@
 // CHECK-BE-SF-16: "[[TC]]/../../../../sysroot/mips32/mips16/sof/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32 / mips16, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32 -mips16 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN-16 %s
@@ -192,7 +192,7 @@
 // CHECK-BE-NAN-16: "[[TC]]/../../../../sysroot/mips32/mips16/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32 / mips16, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32 -mips16 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN64-16 %s
@@ -219,7 +219,7 @@
 // CHECK-BE-NAN64-16: "[[TC]]/../../../../sysroot/mips32/mips16/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN-32 %s
@@ -246,7 +246,7 @@
 // CHECK-BE-NAN-32: "[[TC]]/../../../../sysroot/mips32/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN64-32 %s
@@ -273,7 +273,7 @@
 // CHECK-BE-NAN64-32: "[[TC]]/../../../../sysroot/mips32/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-32R2 %s
@@ -300,7 +300,7 @@
 // CHECK-BE-HF-32R2: "[[TC]]/../../../../sysroot/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2, hard float, uclibc
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -mhard-float -muclibc -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-UC-HF-32R2 %s
@@ -327,7 +327,7 @@
 // CHECK-BE-UC-HF-32R2: "[[TC]]/../../../../sysroot/uclibc/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF64-32R2 %s
@@ -354,7 +354,7 @@
 // CHECK-BE-HF64-32R2: "[[TC]]/../../../../sysroot/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-32R2 %s
@@ -381,7 +381,7 @@
 // CHECK-BE-SF-32R2: "[[TC]]/../../../../sysroot/sof/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2, soft float, uclibc
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -msoft-float -muclibc -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-UC-SF-32R2 %s
@@ -408,7 +408,7 @@
 // CHECK-BE-UC-SF-32R2: "[[TC]]/../../../../sysroot/uclibc/sof/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2 / mips16, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -mips16 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-16R2 %s
@@ -435,7 +435,7 @@
 // CHECK-BE-HF-16R2: "[[TC]]/../../../../sysroot/mips16/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2 / mips16, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -mips16 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF64-16R2 %s
@@ -462,7 +462,7 @@
 // CHECK-BE-HF64-16R2: "[[TC]]/../../../../sysroot/mips16/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2 / mips16, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -mips16 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-16R2 %s
@@ -489,7 +489,7 @@
 // CHECK-BE-SF-16R2: "[[TC]]/../../../../sysroot/mips16/sof/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2 / mips16, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -mips16 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN-16R2 %s
@@ -516,7 +516,7 @@
 // CHECK-BE-NAN-16R2: "[[TC]]/../../../../sysroot/mips16/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2 / mips16, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -mips16 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN64-16R2 %s
@@ -543,7 +543,7 @@
 // CHECK-BE-NAN64-16R2: "[[TC]]/../../../../sysroot/mips16/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN-32R2 %s
@@ -570,7 +570,7 @@
 // CHECK-BE-NAN-32R2: "[[TC]]/../../../../sysroot/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2, nan2008, uclibc
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -mnan=2008 -muclibc -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-UC-NAN-32R2 %s
@@ -597,7 +597,7 @@
 // CHECK-BE-UC-NAN-32R2: "[[TC]]/../../../../sysroot/uclibc/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r2, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r2 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN64-32R2 %s
@@ -624,7 +624,7 @@
 // CHECK-BE-NAN64-32R2: "[[TC]]/../../../../sysroot/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, default (mips32r2), fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN64-32R2-DEF %s
@@ -651,7 +651,7 @@
 // CHECK-BE-NAN64-32R2-DEF: "[[TC]]/../../../../sysroot/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, micromips, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mmicromips -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-MM %s
@@ -678,7 +678,7 @@
 // CHECK-BE-HF-MM: "[[TC]]/../../../../sysroot/micromips/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, micromips, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mmicromips -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF64-MM %s
@@ -705,7 +705,7 @@
 // CHECK-BE-HF64-MM: "[[TC]]/../../../../sysroot/micromips/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, micromips, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mmicromips -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-MM %s
@@ -732,7 +732,7 @@
 // CHECK-BE-SF-MM: "[[TC]]/../../../../sysroot/micromips/sof/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, micromips, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mmicromips -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN-MM %s
@@ -759,7 +759,7 @@
 // CHECK-BE-NAN-MM: "[[TC]]/../../../../sysroot/micromips/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, micromips, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mmicromips -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN64-MM %s
@@ -786,7 +786,7 @@
 // CHECK-BE-NAN64-MM: "[[TC]]/../../../../sysroot/micromips/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64, ABI n32, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64 -mabi=n32 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-64-N32 %s
@@ -813,7 +813,7 @@
 // CHECK-BE-HF-64-N32: "[[TC]]/../../../../sysroot/mips64/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64, ABI n32, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64 -mabi=n32 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF64-64-N32 %s
@@ -840,7 +840,7 @@
 // CHECK-BE-HF64-64-N32: "[[TC]]/../../../../sysroot/mips64/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64, ABI n32, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64 -mabi=n32 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-64-N32 %s
@@ -867,7 +867,7 @@
 // CHECK-BE-SF-64-N32: "[[TC]]/../../../../sysroot/mips64/sof/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64, ABI n32, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64 -mabi=n32 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN-64-N32 %s
@@ -894,7 +894,7 @@
 // CHECK-BE-NAN-64-N32: "[[TC]]/../../../../sysroot/mips64/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64, ABI n32, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64 -mabi=n32 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN64-64-N32 %s
@@ -921,7 +921,7 @@
 // CHECK-BE-NAN64-64-N32: "[[TC]]/../../../../sysroot/mips64/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64, ABI 64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64 -mabi=64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-64-64 %s
@@ -948,7 +948,7 @@
 // CHECK-BE-HF-64-64: "[[TC]]/../../../../sysroot/mips64/64/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64, ABI 64, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64 -mabi=64 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF64-64-64 %s
@@ -975,7 +975,7 @@
 // CHECK-BE-HF64-64-64: "[[TC]]/../../../../sysroot/mips64/64/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64, ABI 64, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64 -mabi=64 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-64-64 %s
@@ -1002,7 +1002,7 @@
 // CHECK-BE-SF-64-64: "[[TC]]/../../../../sysroot/mips64/64/sof/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64, ABI 64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64 -mabi=64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN-64-64 %s
@@ -1029,7 +1029,7 @@
 // CHECK-BE-NAN-64-64: "[[TC]]/../../../../sysroot/mips64/64/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64, ABI 64, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64 -mabi=64 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN64-64-64 %s
@@ -1056,7 +1056,7 @@
 // CHECK-BE-NAN64-64-64: "[[TC]]/../../../../sysroot/mips64/64/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r2, ABI n32, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64r2 -mabi=n32 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-64R2-N32 %s
@@ -1083,7 +1083,7 @@
 // CHECK-BE-HF-64R2-N32: "[[TC]]/../../../../sysroot/mips64r2/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r2, ABI n32, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64r2 -mabi=n32 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF64-64R2-N32 %s
@@ -1110,7 +1110,7 @@
 // CHECK-BE-HF64-64R2-N32: "[[TC]]/../../../../sysroot/mips64r2/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r2, ABI n32, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64r2 -mabi=n32 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-64R2-N32 %s
@@ -1137,7 +1137,7 @@
 // CHECK-BE-SF-64R2-N32: "[[TC]]/../../../../sysroot/mips64r2/sof/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r2, ABI n32, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64r2 -mabi=n32 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN-64R2-N32 %s
@@ -1164,7 +1164,7 @@
 // CHECK-BE-NAN-64R2-N32: "[[TC]]/../../../../sysroot/mips64r2/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r2, ABI n32, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64r2 -mabi=n32 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN64-64R2-N32 %s
@@ -1191,7 +1191,7 @@
 // CHECK-BE-NAN64-64R2-N32: "[[TC]]/../../../../sysroot/mips64r2/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r2, ABI 64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64r2 -mabi=64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-64R2-64 %s
@@ -1218,7 +1218,7 @@
 // CHECK-BE-HF-64R2-64: "[[TC]]/../../../../sysroot/mips64r2/64/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r2, ABI 64, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64r2 -mabi=64 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF64-64R2-64 %s
@@ -1245,7 +1245,7 @@
 // CHECK-BE-HF64-64R2-64: "[[TC]]/../../../../sysroot/mips64r2/64/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r2, ABI 64, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64r2 -mabi=64 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-64R2-64 %s
@@ -1272,7 +1272,7 @@
 // CHECK-BE-SF-64R2-64: "[[TC]]/../../../../sysroot/mips64r2/64/sof/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r2, ABI 64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64r2 -mabi=64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN-64R2-64 %s
@@ -1299,7 +1299,7 @@
 // CHECK-BE-NAN-64R2-64: "[[TC]]/../../../../sysroot/mips64r2/64/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r2, ABI 64, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64r2 -mabi=64 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN64-64R2-64 %s
@@ -1326,7 +1326,7 @@
 // CHECK-BE-NAN64-64R2-64: "[[TC]]/../../../../sysroot/mips64r2/64/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, default (mips64r2), ABI 64, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mabi=64 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-NAN64-64R2-64-DEF %s
@@ -1353,7 +1353,7 @@
 // CHECK-BE-NAN64-64R2-64-DEF: "[[TC]]/../../../../sysroot/mips64r2/64/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-32 %s
@@ -1380,7 +1380,7 @@
 // CHECK-EL-HF-32: "[[TC]]/../../../../sysroot/mips32/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF64-32 %s
@@ -1407,7 +1407,7 @@
 // CHECK-EL-HF64-32: "[[TC]]/../../../../sysroot/mips32/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-32 %s
@@ -1434,7 +1434,7 @@
 // CHECK-EL-SF-32: "[[TC]]/../../../../sysroot/mips32/el/sof/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32 / mips16, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32 -mips16 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-16 %s
@@ -1461,7 +1461,7 @@
 // CHECK-EL-HF-16: "[[TC]]/../../../../sysroot/mips32/mips16/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32 / mips16, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32 -mips16 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF64-16 %s
@@ -1488,7 +1488,7 @@
 // CHECK-EL-HF64-16: "[[TC]]/../../../../sysroot/mips32/mips16/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32 / mips16, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32 -mips16 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-16 %s
@@ -1515,7 +1515,7 @@
 // CHECK-EL-SF-16: "[[TC]]/../../../../sysroot/mips32/mips16/el/sof/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32 / mips16, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32 -mips16 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN-16 %s
@@ -1542,7 +1542,7 @@
 // CHECK-EL-NAN-16: "[[TC]]/../../../../sysroot/mips32/mips16/el/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32 / mips16, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32 -mips16 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN64-16 %s
@@ -1569,7 +1569,7 @@
 // CHECK-EL-NAN64-16: "[[TC]]/../../../../sysroot/mips32/mips16/el/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN-32 %s
@@ -1596,7 +1596,7 @@
 // CHECK-EL-NAN-32: "[[TC]]/../../../../sysroot/mips32/el/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN64-32 %s
@@ -1623,7 +1623,7 @@
 // CHECK-EL-NAN64-32: "[[TC]]/../../../../sysroot/mips32/el/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-32R2 %s
@@ -1650,7 +1650,7 @@
 // CHECK-EL-HF-32R2: "[[TC]]/../../../../sysroot/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2, hard float, uclibc
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -mhard-float -muclibc -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-UC-HF-32R2 %s
@@ -1677,7 +1677,7 @@
 // CHECK-EL-UC-HF-32R2: "[[TC]]/../../../../sysroot/uclibc/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF64-32R2 %s
@@ -1704,7 +1704,7 @@
 // CHECK-EL-HF64-32R2: "[[TC]]/../../../../sysroot/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-32R2 %s
@@ -1731,7 +1731,7 @@
 // CHECK-EL-SF-32R2: "[[TC]]/../../../../sysroot/el/sof/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2, soft float, uclibc
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -msoft-float -muclibc -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-UC-SF-32R2 %s
@@ -1758,7 +1758,7 @@
 // CHECK-EL-UC-SF-32R2: "[[TC]]/../../../../sysroot/uclibc/el/sof/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2 / mips16, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -mips16 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-16R2 %s
@@ -1785,7 +1785,7 @@
 // CHECK-EL-HF-16R2: "[[TC]]/../../../../sysroot/mips16/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2 / mips16, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -mips16 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF64-16R2 %s
@@ -1812,7 +1812,7 @@
 // CHECK-EL-HF64-16R2: "[[TC]]/../../../../sysroot/mips16/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2 / mips16, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -mips16 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-16R2 %s
@@ -1839,7 +1839,7 @@
 // CHECK-EL-SF-16R2: "[[TC]]/../../../../sysroot/mips16/el/sof/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2 / mips16, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -mips16 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN-16R2 %s
@@ -1866,7 +1866,7 @@
 // CHECK-EL-NAN-16R2: "[[TC]]/../../../../sysroot/mips16/el/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2 / mips16, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -mips16 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN64-16R2 %s
@@ -1893,7 +1893,7 @@
 // CHECK-EL-NAN64-16R2: "[[TC]]/../../../../sysroot/mips16/el/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN-32R2 %s
@@ -1920,7 +1920,7 @@
 // CHECK-EL-NAN-32R2: "[[TC]]/../../../../sysroot/el/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2, nan2008, uclibc
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -mnan=2008 -muclibc -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-UC-NAN-32R2 %s
@@ -1947,7 +1947,7 @@
 // CHECK-EL-UC-NAN-32R2: "[[TC]]/../../../../sysroot/uclibc/el/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r2, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mips32r2 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN64-32R2 %s
@@ -1974,7 +1974,7 @@
 // CHECK-EL-NAN64-32R2: "[[TC]]/../../../../sysroot/el/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, default (mips32r2), fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN64-32R2-DEF %s
@@ -2001,7 +2001,7 @@
 // CHECK-EL-NAN64-32R2-DEF: "[[TC]]/../../../../sysroot/el/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, micromips, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mmicromips -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-MM %s
@@ -2028,7 +2028,7 @@
 // CHECK-EL-HF-MM: "[[TC]]/../../../../sysroot/micromips/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, micromips, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mmicromips -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF64-MM %s
@@ -2055,7 +2055,7 @@
 // CHECK-EL-HF64-MM: "[[TC]]/../../../../sysroot/micromips/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, micromips, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mmicromips -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-MM %s
@@ -2082,7 +2082,7 @@
 // CHECK-EL-SF-MM: "[[TC]]/../../../../sysroot/micromips/el/sof/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, micromips, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mmicromips -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN-MM %s
@@ -2109,7 +2109,7 @@
 // CHECK-EL-NAN-MM: "[[TC]]/../../../../sysroot/micromips/el/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, micromips, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-mti-linux-gnu -mmicromips -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN64-MM %s
@@ -2136,7 +2136,7 @@
 // CHECK-EL-NAN64-MM: "[[TC]]/../../../../sysroot/micromips/el/nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64, ABI n32, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64 -mabi=n32 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-64-N32 %s
@@ -2163,7 +2163,7 @@
 // CHECK-EL-HF-64-N32: "[[TC]]/../../../../sysroot/mips64/el/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64, ABI n32, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64 -mabi=n32 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF64-64-N32 %s
@@ -2190,7 +2190,7 @@
 // CHECK-EL-HF64-64-N32: "[[TC]]/../../../../sysroot/mips64/el/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64, ABI n32, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64 -mabi=n32 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-64-N32 %s
@@ -2217,7 +2217,7 @@
 // CHECK-EL-SF-64-N32: "[[TC]]/../../../../sysroot/mips64/el/sof/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64, ABI n32, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64 -mabi=n32 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN-64-N32 %s
@@ -2244,7 +2244,7 @@
 // CHECK-EL-NAN-64-N32: "[[TC]]/../../../../sysroot/mips64/el/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64, ABI n32, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64 -mabi=n32 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN64-64-N32 %s
@@ -2271,7 +2271,7 @@
 // CHECK-EL-NAN64-64-N32: "[[TC]]/../../../../sysroot/mips64/el/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64, ABI 64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64 -mabi=64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-64-64 %s
@@ -2298,7 +2298,7 @@
 // CHECK-EL-HF-64-64: "[[TC]]/../../../../sysroot/mips64/64/el/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64, ABI 64, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64 -mabi=64 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF64-64-64 %s
@@ -2325,7 +2325,7 @@
 // CHECK-EL-HF64-64-64: "[[TC]]/../../../../sysroot/mips64/64/el/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64, ABI 64, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64 -mabi=64 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-64-64 %s
@@ -2352,7 +2352,7 @@
 // CHECK-EL-SF-64-64: "[[TC]]/../../../../sysroot/mips64/64/el/sof/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64, ABI 64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64 -mabi=64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN-64-64 %s
@@ -2379,7 +2379,7 @@
 // CHECK-EL-NAN-64-64: "[[TC]]/../../../../sysroot/mips64/64/el/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64, ABI 64, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64 -mabi=64 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN64-64-64 %s
@@ -2406,7 +2406,7 @@
 // CHECK-EL-NAN64-64-64: "[[TC]]/../../../../sysroot/mips64/64/el/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64r2, ABI n32, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64r2 -mabi=n32 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-64R2-N32 %s
@@ -2433,7 +2433,7 @@
 // CHECK-EL-HF-64R2-N32: "[[TC]]/../../../../sysroot/mips64r2/el/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64r2, ABI n32, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64r2 -mabi=n32 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF64-64R2-N32 %s
@@ -2460,7 +2460,7 @@
 // CHECK-EL-HF64-64R2-N32: "[[TC]]/../../../../sysroot/mips64r2/el/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64r2, ABI n32, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64r2 -mabi=n32 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-64R2-N32 %s
@@ -2487,7 +2487,7 @@
 // CHECK-EL-SF-64R2-N32: "[[TC]]/../../../../sysroot/mips64r2/el/sof/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64r2, ABI n32, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64r2 -mabi=n32 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN-64R2-N32 %s
@@ -2514,7 +2514,7 @@
 // CHECK-EL-NAN-64R2-N32: "[[TC]]/../../../../sysroot/mips64r2/el/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64r2, ABI n32, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64r2 -mabi=n32 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN64-64R2-N32 %s
@@ -2541,7 +2541,7 @@
 // CHECK-EL-NAN64-64R2-N32: "[[TC]]/../../../../sysroot/mips64r2/el/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64r2, ABI 64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64r2 -mabi=64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-64R2-64 %s
@@ -2568,7 +2568,7 @@
 // CHECK-EL-HF-64R2-64: "[[TC]]/../../../../sysroot/mips64r2/64/el/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64r2, ABI 64, fp64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64r2 -mabi=64 -mfp64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-HF64-64R2-64 %s
@@ -2595,7 +2595,7 @@
 // CHECK-EL-HF64-64R2-64: "[[TC]]/../../../../sysroot/mips64r2/64/el/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64r2, ABI 64, soft float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64r2 -mabi=64 -msoft-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-64R2-64 %s
@@ -2622,7 +2622,7 @@
 // CHECK-EL-SF-64R2-64: "[[TC]]/../../../../sysroot/mips64r2/64/el/sof/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64r2, ABI 64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64r2 -mabi=64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN-64R2-64 %s
@@ -2649,7 +2649,7 @@
 // CHECK-EL-NAN-64R2-64: "[[TC]]/../../../../sysroot/mips64r2/64/el/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64r2, ABI 64, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mips64r2 -mabi=64 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN64-64R2-64 %s
@@ -2676,7 +2676,7 @@
 // CHECK-EL-NAN64-64R2-64: "[[TC]]/../../../../sysroot/mips64r2/64/el/nan2008/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, default (mips64r2), ABI 64, fp64, nan2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64el-mti-linux-gnu -mabi=64 -mfp64 -mnan=2008 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-EL-NAN64-64R2-64-DEF %s
@@ -2705,7 +2705,7 @@
 // Check that mips32r3 and mips32r5 are equal to mips32r2
 //
 // = Big-endian, mips32r3, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r3 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-32R3 %s
@@ -2732,7 +2732,7 @@
 // CHECK-BE-HF-32R3: "[[TC]]/../../../../sysroot/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips32r5, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux-gnu -mips32r5 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-32R5 %s
@@ -2759,7 +2759,7 @@
 // CHECK-BE-HF-32R5: "[[TC]]/../../../../sysroot/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r3, ABI 64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64r3 -mabi=64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-64R3-64 %s
@@ -2786,7 +2786,7 @@
 // CHECK-BE-HF-64R3-64: "[[TC]]/../../../../sysroot/mips64r2/64/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r5, ABI 64, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-mti-linux-gnu -mips64r5 -mabi=64 -mhard-float -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_fsf_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-64R5-64 %s
diff --git a/clang/test/Driver/mips-img-v2.cpp b/clang/test/Driver/mips-img-v2.cpp
index e031466..837c159 100644
--- a/clang/test/Driver/mips-img-v2.cpp
+++ b/clang/test/Driver/mips-img-v2.cpp
@@ -3,7 +3,7 @@
 // Check frontend and linker invocations on the IMG v2 MIPS toolchain.
 
 // -EB -mips32r6 -mhard-float -mabi=32
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-img-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_img_v2_tree \
 // RUN:        -stdlib=libstdc++ \
@@ -31,7 +31,7 @@
 // EB-HARD-O32: "[[TC]]/../../../../sysroot/mips-r6-hard/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EB -mips64r6 -mhard-float -mabi=n32
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-img-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_img_v2_tree \
 // RUN:        -stdlib=libstdc++ \
@@ -59,7 +59,7 @@
 // EB-HARD-N32: "[[TC]]/../../../../sysroot/mips-r6-hard/usr/lib/../lib32{{/|\\\\}}crtn.o"
 
 // -EB -mips64r6 -mhard-float -mabi=64
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips64-img-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_img_v2_tree \
 // RUN:        -stdlib=libstdc++ \
@@ -87,7 +87,7 @@
 // EB-HARD-N64: "[[TC]]/../../../../sysroot/mips-r6-hard/usr/lib/../lib64{{/|\\\\}}crtn.o"
 
 // -EL -mips32r6 -mhard-float -mabi=32
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-img-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_img_v2_tree \
 // RUN:        -stdlib=libstdc++ \
@@ -115,7 +115,7 @@
 // EL-HARD-O32: "[[TC]]/../../../../sysroot/mipsel-r6-hard/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EL -mips64r6 -mhard-float -mabi=n32
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-img-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_img_v2_tree \
 // RUN:        -stdlib=libstdc++ \
@@ -143,7 +143,7 @@
 // EL-HARD-N32: "[[TC]]/../../../../sysroot/mipsel-r6-hard/usr/lib/../lib32{{/|\\\\}}crtn.o"
 
 // -EL -mips64r6 -mhard-float -mabi=64
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips64-img-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_img_v2_tree \
 // RUN:        -stdlib=libstdc++ \
@@ -171,7 +171,7 @@
 // EL-HARD-N64: "[[TC]]/../../../../sysroot/mipsel-r6-hard/usr/lib/../lib64{{/|\\\\}}crtn.o"
 
 // -EB -mips32r6 -msoft-float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-img-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_img_v2_tree \
 // RUN:        -stdlib=libstdc++ \
@@ -199,7 +199,7 @@
 // EB-SOFT: "[[TC]]/../../../../sysroot/mips-r6-soft/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EL -mips32r6 -msoft-float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-img-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_img_v2_tree \
 // RUN:        -stdlib=libstdc++ \
@@ -227,7 +227,7 @@
 // EL-SOFT: "[[TC]]/../../../../sysroot/mipsel-r6-soft/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EB -mips32r6 -mhard-float -mmicromips
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-img-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_img_v2_tree \
 // RUN:        -stdlib=libstdc++ \
@@ -255,7 +255,7 @@
 // EB-HARD-MICRO: "[[TC]]/../../../../sysroot/micromips-r6-hard/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EB -mips32r6 -msoft-float -mmicromips
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-img-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_img_v2_tree \
 // RUN:        -stdlib=libstdc++ \
@@ -283,7 +283,7 @@
 // EB-SOFT-MICRO: "[[TC]]/../../../../sysroot/micromips-r6-soft/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EL -mips32r6 -mhard-float -mmicromips
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-img-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_img_v2_tree \
 // RUN:        -stdlib=libstdc++ \
@@ -311,7 +311,7 @@
 // EL-HARD-MICRO: "[[TC]]/../../../../sysroot/micromipsel-r6-hard/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EL -mips32r6 -msoft-float -mmicromips
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-img-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_img_v2_tree \
 // RUN:        -stdlib=libstdc++ \
diff --git a/clang/test/Driver/mips-img.cpp b/clang/test/Driver/mips-img.cpp
index 272fa72..3dd3dec0 100644
--- a/clang/test/Driver/mips-img.cpp
+++ b/clang/test/Driver/mips-img.cpp
@@ -3,7 +3,7 @@
 // Check frontend and linker invocations on the IMG MIPS toolchain.
 //
 // = Big-endian, mips32r6
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-img-linux-gnu -mips32r6 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_img_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-32R6 %s
@@ -30,7 +30,7 @@
 // CHECK-BE-32R6: "[[TC]]/../../../../sysroot/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips32r6
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-img-linux-gnu -mips32r6 -EL -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_img_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LE-32R6 %s
@@ -57,7 +57,7 @@
 // CHECK-LE-32R6: "[[TC]]/../../../../sysroot/el/usr/lib/../lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r6, N32
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-img-linux-gnu -mips64r6 -mabi=n32 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_img_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-64R6-N32 %s
@@ -84,7 +84,7 @@
 // CHECK-BE-64R6-N32: "[[TC]]/../../../../sysroot/mips64r6/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64r6, N32
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-img-linux-gnu -mips64r6 -EL -mabi=n32 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_img_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LE-64R6-N32 %s
@@ -111,7 +111,7 @@
 // CHECK-LE-64R6-N32: "[[TC]]/../../../../sysroot/mips64r6/el/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Big-endian, mips64r6, N64
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-img-linux-gnu -mips64r6 -mabi=64 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_img_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-BE-64R6-N64 %s
@@ -138,7 +138,7 @@
 // CHECK-BE-64R6-N64: "[[TC]]/../../../../sysroot/mips64r6/64/usr/lib{{/|\\\\}}crtn.o"
 //
 // = Little-endian, mips64r6, N64
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips64-img-linux-gnu -mips64r6 -EL -mabi=64 -no-pie \
 // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_img_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LE-64R6-N64 %s
diff --git a/clang/test/Driver/mips-mti-linux.c b/clang/test/Driver/mips-mti-linux.c
index 0c5efa5..ebcbff9 100644
--- a/clang/test/Driver/mips-mti-linux.c
+++ b/clang/test/Driver/mips-mti-linux.c
@@ -7,7 +7,7 @@
 // REQUIRES: mips-registered-target
 
 // = Big-endian, mips32r2, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux -mips32r2 -mhard-float -no-pie \
 // RUN:     -rtlib=platform -fuse-ld=ld \
 // RUN:     --sysroot=%S/Inputs/mips_mti_linux/sysroot \
@@ -26,7 +26,7 @@
 // CHECK-BE-HF-32R2-SAME: "[[SYSROOT]]/mips-r2-hard-musl/usr/lib{{/|\\\\}}crtn.o"
 
 // = Little-endian, mips32r2, hard float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-mti-linux -mips32r2 -EL -mhard-float -no-pie \
 // RUN:     -rtlib=platform -fuse-ld=ld \
 // RUN:     --sysroot=%S/Inputs/mips_mti_linux/sysroot \
diff --git a/clang/test/Driver/mips-mti.cpp b/clang/test/Driver/mips-mti.cpp
index 3221c60..05b9b67 100644
--- a/clang/test/Driver/mips-mti.cpp
+++ b/clang/test/Driver/mips-mti.cpp
@@ -1,7 +1,7 @@
 // Check frontend and linker invocations on the MTI MIPS toolchain.
 
 // -EB -mhard-float -mabi=32
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -30,7 +30,7 @@
 // EB-HARD-O32: "[[TC]]/../../../../sysroot/mips-r2-hard/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EB -mhard-float -mabi=n32
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -59,7 +59,7 @@
 // EB-HARD-N32: "[[TC]]/../../../../sysroot/mips-r2-hard/usr/lib/../lib32{{/|\\\\}}crtn.o"
 
 // -EB -mhard-float -mabi=64
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips64-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -88,7 +88,7 @@
 // EB-HARD-N64: "[[TC]]/../../../../sysroot/mips-r2-hard/usr/lib/../lib64{{/|\\\\}}crtn.o"
 
 // -EL -mhard-float -mabi=32
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -117,7 +117,7 @@
 // EL-HARD-O32: "[[TC]]/../../../../sysroot/mipsel-r2-hard/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EL -mhard-float -mabi=n32
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -146,7 +146,7 @@
 // EL-HARD-N32: "[[TC]]/../../../../sysroot/mipsel-r2-hard/usr/lib/../lib32{{/|\\\\}}crtn.o"
 
 // -EL -mhard-float -mabi=64
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips64-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -175,7 +175,7 @@
 // EL-HARD-N64: "[[TC]]/../../../../sysroot/mipsel-r2-hard/usr/lib/../lib64{{/|\\\\}}crtn.o"
 
 // -EB -msoft-float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -204,7 +204,7 @@
 // EB-SOFT: "[[TC]]/../../../../sysroot/mips-r2-soft/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EL -msoft-float
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -233,7 +233,7 @@
 // EL-SOFT: "[[TC]]/../../../../sysroot/mipsel-r2-soft/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EB -mhard-float -muclibc
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -262,7 +262,7 @@
 // EB-HARD-UCLIBC: "[[TC]]/../../../../sysroot/mips-r2-hard-uclibc/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EL -mhard-float -muclibc
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -291,7 +291,7 @@
 // EL-HARD-UCLIBC: "[[TC]]/../../../../sysroot/mipsel-r2-hard-uclibc/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EB -mhard-float -mnan=2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -320,7 +320,7 @@
 // EB-HARD-NAN2008: "[[TC]]/../../../../sysroot/mips-r2-hard-nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EL -mhard-float -mnan=2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -349,7 +349,7 @@
 // EL-HARD-NAN2008: "[[TC]]/../../../../sysroot/mipsel-r2-hard-nan2008/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EB -mhard-float -muclibc -mnan=2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -378,7 +378,7 @@
 // EB-HARD-UCLIBC-NAN2008: "[[TC]]/../../../../sysroot/mips-r2-hard-nan2008-uclibc/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EL -mhard-float -muclibc -mnan=2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -407,7 +407,7 @@
 // EL-HARD-UCLIBC-NAN2008: "[[TC]]/../../../../sysroot/mipsel-r2-hard-nan2008-uclibc/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EL -msoft-float -mmicromips
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
@@ -436,7 +436,7 @@
 // EL-SOFT-MICRO: "[[TC]]/../../../../sysroot/micromipsel-r2-soft/usr/lib/../lib{{/|\\\\}}crtn.o"
 
 // -EL -mhard-float -mmicromips -mnan=2008
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:        --target=mips-mti-linux-gnu \
 // RUN:        --gcc-toolchain=%S/Inputs/mips_mti_tree \
 // RUN:        --sysroot="" \
diff --git a/clang/test/Driver/mips-reduced-toolchain.cpp b/clang/test/Driver/mips-reduced-toolchain.cpp
index a7a6168..54d2695 100644
--- a/clang/test/Driver/mips-reduced-toolchain.cpp
+++ b/clang/test/Driver/mips-reduced-toolchain.cpp
@@ -1,7 +1,7 @@
 // Check frontend and linker invocations on reduced Debian MIPS toolchain.
 // This toolchain icludes O32 ABI only.
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mips-linux-gnu \
 // RUN:     --sysroot=%S/Inputs/debian_reduced_mips_tree \
 // RUN:     --gcc-toolchain="" -no-pie \
@@ -13,7 +13,7 @@
 // CHECK-DEBIAN-MIPS: "-L[[SYSROOT]]/lib"
 // CHECK-DEBIAN-MIPS: "-L[[SYSROOT]]/usr/lib"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -### %s 2>&1 \
 // RUN:     --target=mipsel-linux-gnu \
 // RUN:     --sysroot=%S/Inputs/debian_reduced_mips_tree \
 // RUN:     --gcc-toolchain="" -no-pie \
diff --git a/clang/test/Driver/msp430-mmcu.c b/clang/test/Driver/msp430-mmcu.c
index 5f973a5..b5d370e 100644
--- a/clang/test/Driver/msp430-mmcu.c
+++ b/clang/test/Driver/msp430-mmcu.c
@@ -1,22 +1,22 @@
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430c111 2>&1 \
+// RUN: %clang -### %s --target=msp430 -mmcu=msp430c111 2>&1 \
 // RUN:   | FileCheck -check-prefix=MSP430-C111 %s
 
-// MSP430-C111: clang{{.*}} "-cc1" {{.*}} "-D__MSP430C111__"
+// MSP430-C111: "-cc1" {{.*}} "-D__MSP430C111__"
 // MSP430-C111: msp430-elf-ld{{.*}} "-Tmsp430c111.ld"
 
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430i2020 2>&1 \
+// RUN: %clang -### %s --target=msp430 -mmcu=msp430i2020 2>&1 \
 // RUN:   | FileCheck -check-prefix=MSP430-I2020 %s
 
-// MSP430-I2020: clang{{.*}} "-cc1" {{.*}} "-D__MSP430i2020__"
+// MSP430-I2020: "-cc1" {{.*}} "-D__MSP430i2020__"
 // MSP430-I2020: msp430-elf-ld{{.*}} "-Tmsp430i2020.ld"
 
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=not-a-mcu 2>&1 \
+// RUN: %clang -### %s --target=msp430 -mmcu=not-a-mcu 2>&1 \
 // RUN:   | FileCheck -check-prefix=MSP430-UNSUP %s
 
 // MSP430-UNSUP: error: the clang compiler does not support 'not-a-mcu'
 
 // The generic MCU name "msp430" is not supported.
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430 2>&1 \
+// RUN: %clang -### %s --target=msp430 -mmcu=msp430 2>&1 \
 // RUN:   | FileCheck -check-prefix=MSP430 %s
 
 // MSP430: error: the clang compiler does not support 'msp430'
diff --git a/clang/test/Driver/myriad-toolchain.c b/clang/test/Driver/myriad-toolchain.c
index a4bd260..f680420 100644
--- a/clang/test/Driver/myriad-toolchain.c
+++ b/clang/test/Driver/myriad-toolchain.c
@@ -1,4 +1,4 @@
-// RUN: %clang -no-canonical-prefixes -### -target sparc-myriad-rtems %s \
+// RUN: %clang -### --target=sparc-myriad-rtems %s \
 // RUN: -ccc-install-dir %S/Inputs/basic_myriad_tree/bin \
 // RUN: --gcc-toolchain=%S/Inputs/basic_myriad_tree 2>&1 | FileCheck %s -check-prefix=LINK_WITH_RTEMS
 // LINK_WITH_RTEMS: Inputs{{.*}}crti.o
@@ -9,22 +9,22 @@
 // LINK_WITH_RTEMS: Inputs{{.*}}crtend.o
 // LINK_WITH_RTEMS: Inputs{{.*}}crtn.o
 
-// RUN: %clang -c -no-canonical-prefixes -### -target sparc-myriad-rtems -x c++ %s \
+// RUN: %clang -c -### --target=sparc-myriad-rtems -x c++ %s \
 // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/basic_myriad_tree 2>&1 | FileCheck %s -check-prefix=COMPILE_CXX
 // COMPILE_CXX: "-internal-isystem" "{{.*}}/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/../../../../sparc-myriad-rtems/include/c++/6.3.0"
 // COMPILE_CXX: "-internal-isystem" "{{.*}}/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/../../../../sparc-myriad-rtems/include/c++/6.3.0/sparc-myriad-rtems"
 // COMPILE_CXX: "-internal-isystem" "{{.*}}/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/../../../../sparc-myriad-rtems/include/c++/6.3.0/backward"
 
-// RUN: %clang -### -E -target sparc-myriad --sysroot=/yow %s 2>&1 \
+// RUN: %clang -### -E --target=sparc-myriad --sysroot=/yow %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=SLASH_INCLUDE
 // SLASH_INCLUDE: "-isysroot" "/yow" "-internal-isystem" "/yow/include"
 
-// RUN: %clang -### -E -target sparc-myriad --sysroot=/yow %s -nostdinc 2>&1 \
+// RUN: %clang -### -E --target=sparc-myriad --sysroot=/yow %s -nostdinc 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=NO_SLASH_INCLUDE
 // NO_SLASH_INCLUDE: "-isysroot" "/yow"
 // NO_SLASH_INCLUDE-NOT: "-internal-isystem" "/yow/include"
 
-// RUN: %clang -### -target what-myriad %s 2>&1 | FileCheck %s -check-prefix=BAD_ARCH
+// RUN: %clang -### --target=what-myriad %s 2>&1 | FileCheck %s -check-prefix=BAD_ARCH
 // BAD_ARCH: the target architecture 'what' is not supported by the target 'myriad'
 
 // Ensure that '-target shave' picks a different compiler.
@@ -37,22 +37,22 @@
 // As such, we test only for a trailing quote in its rendering.
 // The same goes for "moviAsm".
 
-// RUN: %clang -target shave-myriad -mcpu=myriad2.2 -c -### %s -isystem somewhere -Icommon -Wa,-yippee 2>&1 \
+// RUN: %clang --target=shave-myriad -mcpu=myriad2.2 -c -### %s -isystem somewhere -Icommon -Wa,-yippee 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=MOVICOMPILE
 // MOVICOMPILE: moviCompile{{(.exe)?}}" "-S" "-fno-exceptions" "-DMYRIAD2" "-mcpu=myriad2.2" "-isystem" "somewhere" "-I" "common"
 // MOVICOMPILE: moviAsm{{(.exe)?}}" "-no6thSlotCompression" "-cv:myriad2.2" "-noSPrefixing" "-a"
 // MOVICOMPILE: "-yippee" "-i:somewhere" "-i:common"
 
-// RUN: %clang -target shave-myriad -c -### %s -DEFINE_ME -UNDEFINE_ME 2>&1 \
+// RUN: %clang --target=shave-myriad -c -### %s -DEFINE_ME -UNDEFINE_ME 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=DEFINES
 // DEFINES: "-D" "EFINE_ME" "-U" "NDEFINE_ME"
 
-// RUN: %clang -target shave-myriad -c -### %s -Icommon -iquote quotepath -isystem syspath 2>&1 \
+// RUN: %clang --target=shave-myriad -c -### %s -Icommon -iquote quotepath -isystem syspath 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=INCLUDES
 // INCLUDES: "-iquote" "quotepath" "-isystem" "syspath"
 
 // -fno-split-dwarf-inlining is consumed but not passed to moviCompile.
-// RUN: %clang -target shave-myriad -c -### %s -g -fno-inline-functions \
+// RUN: %clang --target=shave-myriad -c -### %s -g -fno-inline-functions \
 // RUN: -fno-inline-functions-called-once -Os -Wall -MF dep.d -fno-split-dwarf-inlining \
 // RUN: -ffunction-sections -Xclang -xclangflag -mllvm -llvm-flag 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=PASSTHRU_OPTIONS
@@ -60,31 +60,31 @@
 // PASSTHRU_OPTIONS: "-Os" "-Wall" "-MF" "dep.d" "-ffunction-sections"
 // PASSTHRU_OPTIONS: "-Xclang" "-xclangflag" "-mllvm" "-llvm-flag"
 
-// RUN: %clang -target shave-myriad -c %s -o foo.o -### -MD -MF dep.d 2>&1 \
+// RUN: %clang --target=shave-myriad -c %s -o foo.o -### -MD -MF dep.d 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=MDMF
 // MDMF: "-S" "-fno-exceptions" "-DMYRIAD2" "-MD" "-MF" "dep.d" "-MT" "foo.o"
 
-// RUN: %clang -target shave-myriad -std=gnu++11 -mcpu=anothercpu -S %s -o foo.o -### 2>&1 \
+// RUN: %clang --target=shave-myriad -std=gnu++11 -mcpu=anothercpu -S %s -o foo.o -### 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=STDEQ
 // STDEQ: "-S" "-fno-exceptions" "-DMYRIAD2" "-std=gnu++11" "-mcpu=anothercpu"
 
-// RUN: %clang -target shave-myriad -E -Ifoo %s -o foo.i -### 2>&1 \
+// RUN: %clang --target=shave-myriad -E -Ifoo %s -o foo.i -### 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=PREPROCESS
 // PREPROCESS: "-E" "-DMYRIAD2" "-I" "foo"
 
-// RUN: %clang -stdlib=platform -target sparc-myriad -### --driver-mode=g++ %s 2>&1 | FileCheck %s --check-prefix=LIBSTDCXX
+// RUN: %clang -stdlib=platform --target=sparc-myriad -### --driver-mode=g++ %s 2>&1 | FileCheck %s --check-prefix=LIBSTDCXX
 // LIBSTDCXX: "-lstdc++" "-lc" "-lgcc"
 
-// RUN: %clang -stdlib=libc++ -### -target sparcel-myriad -S -x c++ %s 2>&1 | FileCheck %s -check-prefix=LIBCXX
+// RUN: %clang -stdlib=libc++ -### --target=sparcel-myriad -S -x c++ %s 2>&1 | FileCheck %s -check-prefix=LIBCXX
 // LIBCXX: "-internal-isystem" "{{.*}}/../include/c++/v1"
 
-// RUN: %clang -target sparc-myriad -### -nostdlib %s 2>&1 | FileCheck %s --check-prefix=NOSTDLIB
+// RUN: %clang --target=sparc-myriad -### -nostdlib %s 2>&1 | FileCheck %s --check-prefix=NOSTDLIB
 // NOSTDLIB-NOT: crtbegin.o
 // NOSTDLIB-NOT: "-lc"
 
-// RUN: %clang -### -c -g %s -target sparc-myriad 2>&1 | FileCheck -check-prefix=G_SPARC %s
+// RUN: %clang -### -c -g %s --target=sparc-myriad 2>&1 | FileCheck -check-prefix=G_SPARC %s
 // G_SPARC: "-debug-info-kind=constructor" "-dwarf-version=2"
 
-// RUN: %clang -### -c %s -target sparc-myriad-rtems -fuse-init-array 2>&1 \
+// RUN: %clang -### -c %s --target=sparc-myriad-rtems -fuse-init-array 2>&1 \
 // RUN: | FileCheck -check-prefix=USE-INIT-ARRAY %s
 // USE-INIT-ARRAY-NOT: argument unused
diff --git a/clang/test/Driver/native-llvm.c b/clang/test/Driver/native-llvm.c
new file mode 100644
index 0000000..c9543b8
--- /dev/null
+++ b/clang/test/Driver/native-llvm.c
@@ -0,0 +1,5 @@
+// Check that clang reports an error message if -flto without -c is used
+// on a toolchain that is not expecting it (HasNativeLLVMSupport() is false).
+
+// RUN: %clang -### -flto -target x86_64-unknown-unknown %s 2>&1 | FileCheck %s
+// CHECK: error: {{.*}} unable to pass LLVM bit-code files to linker
diff --git a/clang/test/Driver/nostdlib.c b/clang/test/Driver/nostdlib.c
index c9793d9..ffc20ff 100644
--- a/clang/test/Driver/nostdlib.c
+++ b/clang/test/Driver/nostdlib.c
@@ -1,4 +1,4 @@
-// RUN: %clang -target i686-pc-linux-gnu -### -nostdlib %s 2> %t
+// RUN: %clang --target=i686-pc-linux-gnu -### -nostdlib %s 2> %t
 // RUN: FileCheck < %t %s
 //
 // CHECK-NOT: start-group
@@ -6,24 +6,24 @@
 // Most of the toolchains would check for -nostartfiles and -nostdlib
 // in a short-circuiting boolean expression, so if both of the preceding
 // options were present, the second would warn about being unused.
-// RUN: %clang -### -Wno-liblto -nostartfiles -nostdlib -target i386-apple-darwin %s \
+// RUN: %clang -### -Wno-liblto -nostartfiles -nostdlib --target=i386-apple-darwin %s \
 // RUN:   2>&1 | FileCheck %s -check-prefix=ARGSCLAIMED
 // ARGSCLAIMED-NOT: warning:
 
 // In the presence of -nostdlib, the standard libraries should not be
 // passed down to link line
-// RUN: %clang -no-canonical-prefixes %s -### -Wno-liblto -o %t.o 2>&1 \
-// RUN:     -target i686-pc-linux-gnu -nostdlib --rtlib=compiler-rt -fuse-ld=ld \
+// RUN: %clang -### %s -Wno-liblto 2>&1 \
+// RUN:     --target=i686-pc-linux-gnu -nostdlib --rtlib=compiler-rt -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir -lclang_rt.builtins-i686 \
 // RUN:   | FileCheck --check-prefix=CHECK-LINUX-NOSTDLIB %s
 //
-// RUN: %clang -no-canonical-prefixes %s -### -Wno-liblto -o %t.o 2>&1 \
-// RUN:     -target i686-pc-linux-gnu --rtlib=compiler-rt -nostdlib -fuse-ld=ld \
+// RUN: %clang -### %s -Wno-liblto 2>&1 \
+// RUN:     --target=i686-pc-linux-gnu --rtlib=compiler-rt -nostdlib -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir -lclang_rt.builtins-i686 \
 // RUN:   | FileCheck --check-prefix=CHECK-LINUX-NOSTDLIB %s
 //
-// RUN: %clang -target x86_64-pc-windows-msvc -nostdlib --rtlib=compiler-rt -### -Wno-liblto %s 2>&1 | FileCheck %s -check-prefix CHECK-MSVC-NOSTDLIB
-// RUN: %clang -target x86_64-pc-windows-msvc --rtlib=compiler-rt -nostdlib -### -Wno-liblto %s 2>&1 | FileCheck %s -check-prefix CHECK-MSVC-NOSTDLIB
+// RUN: %clang --target=x86_64-pc-windows-msvc -nostdlib --rtlib=compiler-rt -### -Wno-liblto %s 2>&1 | FileCheck %s -check-prefix CHECK-MSVC-NOSTDLIB
+// RUN: %clang --target=x86_64-pc-windows-msvc --rtlib=compiler-rt -nostdlib -### -Wno-liblto %s 2>&1 | FileCheck %s -check-prefix CHECK-MSVC-NOSTDLIB
 //
 // CHECK-LINUX-NOSTDLIB: warning: argument unused during compilation: '--rtlib=compiler-rt'
 // CHECK-LINUX-NOSTDLIB: "{{(.*[^.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
diff --git a/clang/test/Driver/openbsd.c b/clang/test/Driver/openbsd.c
index 5290b48..e3acd2c 100644
--- a/clang/test/Driver/openbsd.c
+++ b/clang/test/Driver/openbsd.c
@@ -1,77 +1,77 @@
 // Check for --eh-frame-hdr being passed with static linking
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -static %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -static -### %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-LD-STATIC-EH %s
-// CHECK-LD-STATIC-EH: clang{{.*}}" "-cc1" "-triple" "i686-pc-openbsd"
+// CHECK-LD-STATIC-EH: "-cc1" "-triple" "i686-pc-openbsd"
 // CHECK-LD-STATIC-EH: ld{{.*}}" "-e" "__start" "--eh-frame-hdr" "-Bstatic" "-o" "a.out" "{{.*}}rcrt0.o" "{{.*}}crtbegin.o" "{{.*}}.o" "-lcompiler_rt" "-lc" "-lcompiler_rt" "{{.*}}crtend.o"
 
 // Check for profiling variants of libraries when linking and -nopie
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -pg -pthread %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -pg -pthread -### %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-PG %s
-// CHECK-PG: clang{{.*}}" "-cc1" "-triple" "i686-pc-openbsd"
+// CHECK-PG: "-cc1" "-triple" "i686-pc-openbsd"
 // CHECK-PG: ld{{.*}}" "-e" "__start" "--eh-frame-hdr" "-Bdynamic" "-dynamic-linker" "{{.*}}ld.so" "-nopie" "-o" "a.out" "{{.*}}gcrt0.o" "{{.*}}crtbegin.o" "{{.*}}.o" "-lcompiler_rt" "-lpthread_p" "-lc_p" "-lcompiler_rt" "{{.*}}crtend.o"
 
 // Check CPU type for i386
-// RUN: %clang -target i386-unknown-openbsd -### -c %s 2>&1 \
+// RUN: %clang --target=i386-unknown-openbsd -### -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-i386-CPU %s
 // CHECK-i386-CPU: "-target-cpu" "i586"
 
 // Check CPU type for MIPS64
-// RUN: %clang -target mips64-unknown-openbsd -### -c %s 2>&1 \
+// RUN: %clang --target=mips64-unknown-openbsd -### -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS64-CPU %s
-// RUN: %clang -target mips64el-unknown-openbsd -### -c %s 2>&1 \
+// RUN: %clang --target=mips64el-unknown-openbsd -### -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS64EL-CPU %s
 // CHECK-MIPS64-CPU: "-target-cpu" "mips3"
 // CHECK-MIPS64EL-CPU: "-target-cpu" "mips3"
 
 // Check that the new linker flags are passed to OpenBSD
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -r %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -r -### %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-LD-R %s
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -s %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -s -### %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-LD-S %s
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -t %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -t -### %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-LD-T %s
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -Z %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -Z -### %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-LD-Z %s
-// RUN: %clang -no-canonical-prefixes -target mips64-unknown-openbsd %s -### 2>&1 \
+// RUN: %clang --target=mips64-unknown-openbsd -### %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64-LD %s
-// RUN: %clang -no-canonical-prefixes -target mips64el-unknown-openbsd %s -### 2>&1 \
+// RUN: %clang --target=mips64el-unknown-openbsd -### %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-MIPS64EL-LD %s
 // CHECK-LD-R:     "-r"
 // CHECK-LD-R-NOT: "-l
 // CHECK-LD-R-NOT: crt{{[^./]+}}.o
-// CHECK-LD-S: clang{{.*}}" "-cc1" "-triple" "i686-pc-openbsd"
+// CHECK-LD-S: "-cc1" "-triple" "i686-pc-openbsd"
 // CHECK-LD-S: ld{{.*}}" "-e" "__start" "--eh-frame-hdr" "-Bdynamic" "-dynamic-linker" "{{.*}}ld.so" "-o" "a.out" "{{.*}}crt0.o" "{{.*}}crtbegin.o" "-L{{.*}}" "-s" "{{.*}}.o" "-lcompiler_rt" "-lc" "-lcompiler_rt" "{{.*}}crtend.o"
-// CHECK-LD-T: clang{{.*}}" "-cc1" "-triple" "i686-pc-openbsd"
+// CHECK-LD-T: "-cc1" "-triple" "i686-pc-openbsd"
 // CHECK-LD-T: ld{{.*}}" "-e" "__start" "--eh-frame-hdr" "-Bdynamic" "-dynamic-linker" "{{.*}}ld.so" "-o" "a.out" "{{.*}}crt0.o" "{{.*}}crtbegin.o" "-L{{.*}}" "-t" "{{.*}}.o" "-lcompiler_rt" "-lc" "-lcompiler_rt" "{{.*}}crtend.o"
-// CHECK-LD-Z: clang{{.*}}" "-cc1" "-triple" "i686-pc-openbsd"
+// CHECK-LD-Z: "-cc1" "-triple" "i686-pc-openbsd"
 // CHECK-LD-Z: ld{{.*}}" "-e" "__start" "--eh-frame-hdr" "-Bdynamic" "-dynamic-linker" "{{.*}}ld.so" "-o" "a.out" "{{.*}}crt0.o" "{{.*}}crtbegin.o" "-L{{.*}}" "-Z" "{{.*}}.o" "-lcompiler_rt" "-lc" "-lcompiler_rt" "{{.*}}crtend.o"
-// CHECK-MIPS64-LD: clang{{.*}}" "-cc1" "-triple" "mips64-unknown-openbsd"
+// CHECK-MIPS64-LD: "-cc1" "-triple" "mips64-unknown-openbsd"
 // CHECK-MIPS64-LD: ld{{.*}}" "-EB" "-e" "__start" "--eh-frame-hdr" "-Bdynamic" "-dynamic-linker" "{{.*}}ld.so" "-o" "a.out" "{{.*}}crt0.o" "{{.*}}crtbegin.o" "-L{{.*}}" "{{.*}}.o" "-lcompiler_rt" "-lc" "-lcompiler_rt" "{{.*}}crtend.o"
-// CHECK-MIPS64EL-LD: clang{{.*}}" "-cc1" "-triple" "mips64el-unknown-openbsd"
+// CHECK-MIPS64EL-LD: "-cc1" "-triple" "mips64el-unknown-openbsd"
 // CHECK-MIPS64EL-LD: ld{{.*}}" "-EL" "-e" "__start" "--eh-frame-hdr" "-Bdynamic" "-dynamic-linker" "{{.*}}ld.so" "-o" "a.out" "{{.*}}crt0.o" "{{.*}}crtbegin.o" "-L{{.*}}" "{{.*}}.o" "-lcompiler_rt" "-lc" "-lcompiler_rt" "{{.*}}crtend.o"
 
 // Check that --sysroot is passed to the linker
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -### %s 2>&1 \
 // RUN:   --sysroot=%S/Inputs/basic_openbsd_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LD-SYSROOT %s
 // CHECK-LD-SYSROOT: ld{{.*}}" "--sysroot=[[SYSROOT:[^"]+]]"
 
 // Check passing options to the assembler for various OpenBSD targets
-// RUN: %clang -target amd64-pc-openbsd -m32 -### -no-integrated-as -c %s 2>&1 \
+// RUN: %clang --target=amd64-pc-openbsd -m32 -### -no-integrated-as -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-AMD64-M32 %s
-// RUN: %clang -target arm-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
+// RUN: %clang --target=arm-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-ARM %s
-// RUN: %clang -target powerpc-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
+// RUN: %clang --target=powerpc-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-POWERPC %s
-// RUN: %clang -target sparc64-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
+// RUN: %clang --target=sparc64-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-SPARC64 %s
-// RUN: %clang -target mips64-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
+// RUN: %clang --target=mips64-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS64 %s
-// RUN: %clang -target mips64-unknown-openbsd -fPIC -### -no-integrated-as -c %s 2>&1 \
+// RUN: %clang --target=mips64-unknown-openbsd -fPIC -### -no-integrated-as -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS64-PIC %s
-// RUN: %clang -target mips64el-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
+// RUN: %clang --target=mips64el-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS64EL %s
-// RUN: %clang -target mips64el-unknown-openbsd -fPIC -### -no-integrated-as -c %s 2>&1 \
+// RUN: %clang --target=mips64el-unknown-openbsd -fPIC -### -no-integrated-as -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS64EL-PIC %s
 // CHECK-AMD64-M32: as{{.*}}" "--32"
 // CHECK-ARM: as{{.*}}" "-mcpu=cortex-a8"
@@ -83,23 +83,23 @@
 // CHECK-MIPS64EL-PIC: as{{.*}}" "-mabi" "64" "-EL" "-KPIC"
 
 // Check linking against correct startup code when (not) using PIE
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -### %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-PIE %s
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -pie %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -pie -### %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-PIE-FLAG %s
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -fno-pie %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -fno-pie -### %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-PIE %s
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -static %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -static -### %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-STATIC-PIE %s
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -static -fno-pie %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -static -fno-pie -### %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-STATIC-PIE %s
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -nopie %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -nopie -### %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-NOPIE %s
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -fno-pie -nopie %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -fno-pie -nopie -### %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-NOPIE %s
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -static -nopie %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -static -nopie -### %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-NOPIE %s
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -fno-pie -static -nopie %s -### 2>&1 \
+// RUN: %clang --target=i686-pc-openbsd -fno-pie -static -nopie -### %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-NOPIE %s
 // CHECK-PIE: "{{.*}}crt0.o"
 // CHECK-PIE-NOT: "-nopie"
@@ -109,20 +109,20 @@
 // CHECK-NOPIE: "-nopie" "{{.*}}crt0.o"
 
 // Check ARM float ABI
-// RUN: %clang -target arm-unknown-openbsd -### -c %s 2>&1 \
+// RUN: %clang --target=arm-unknown-openbsd -### -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-ARM-FLOAT-ABI %s
 // CHECK-ARM-FLOAT-ABI-NOT: "-target-feature" "+soft-float"
 // CHECK-ARM-FLOAT-ABI: "-target-feature" "+soft-float-abi"
 
 // Check PowerPC for Secure PLT
-// RUN: %clang -target powerpc-unknown-openbsd -### -c %s 2>&1 \
+// RUN: %clang --target=powerpc-unknown-openbsd -### -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-POWERPC-SECUREPLT %s
 // CHECK-POWERPC-SECUREPLT: "-target-feature" "+secure-plt"
 
 // Check that unwind tables are enabled
-// RUN: %clang -target arm-unknown-openbsd -### -S %s 2>&1 | \
+// RUN: %clang --target=arm-unknown-openbsd -### -S %s 2>&1 | \
 // RUN: FileCheck -check-prefix=NO-UNWIND-TABLES %s
-// RUN: %clang -target mips64-unknown-openbsd -### -S %s 2>&1 | \
+// RUN: %clang --target=mips64-unknown-openbsd -### -S %s 2>&1 | \
 // RUN: FileCheck -check-prefix=UNWIND-TABLES %s
 // UNWIND-TABLES: "-funwind-tables=2"
 // NO-UNWIND-TABLES-NOT: "-funwind-tables=2"
diff --git a/clang/test/Driver/openmp-offload-gpu-new.c b/clang/test/Driver/openmp-offload-gpu-new.c
index 26f12ee..7281ba7 100644
--- a/clang/test/Driver/openmp-offload-gpu-new.c
+++ b/clang/test/Driver/openmp-offload-gpu-new.c
@@ -12,9 +12,9 @@
 // RUN:   | FileCheck %s
 
 // verify the tools invocations
-// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
-// CHECK: clang{{.*}}"-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52"
-// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj"
+// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
+// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52"
+// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj"
 // CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out"
 
 // RUN:   %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \
@@ -41,11 +41,11 @@
 // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
-// CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"
+// CHECK-EMIT-LLVM-IR: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"
 
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvida-cuda -march=sm_70 \
 // RUN:          --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-new-nvptx-test.bc \
-// RUN:          -no-canonical-prefixes -nogpulib %s -o openmp-offload-gpu 2>&1 \
+// RUN:          -nogpulib %s -o openmp-offload-gpu 2>&1 \
 // RUN:   | FileCheck -check-prefix=DRIVER_EMBEDDING %s
 
 // DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],openmp,nvptx64-nvidia-cuda,sm_70
diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c
index 75e7f82..92020b8 100644
--- a/clang/test/Driver/openmp-offload-gpu.c
+++ b/clang/test/Driver/openmp-offload-gpu.c
@@ -12,7 +12,7 @@
 /// ###########################################################################
 
 /// Check -Xopenmp-target uses one of the archs provided when several archs are used.
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
 // RUN:          -fno-openmp-new-driver -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s
 
@@ -22,7 +22,7 @@
 /// ###########################################################################
 
 /// Check -Xopenmp-target -march=sm_35 works as expected when two triples are present.
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fno-openmp-new-driver \
+// RUN:   %clang -### -fopenmp=libomp -fno-openmp-new-driver \
 // RUN:          -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda \
 // RUN:          -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s
@@ -33,11 +33,11 @@
 /// ###########################################################################
 
 /// Check cubin file generation and usage by nvlink
-// RUN:   %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
+// RUN:   %clang -### --target=powerpc64le-unknown-linux-gnu -fopenmp=libomp \
 // RUN:          -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
 /// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
-// RUN:   %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
+// RUN:   %clang -### --target=x86_64-apple-darwin17.0.0 -fopenmp=libomp \
 // RUN:          -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
 
@@ -49,8 +49,8 @@
 
 /// Check unbundlink of assembly file, cubin file generation and usage by nvlink
 // RUN:   touch %t.s
-// RUN:   %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN:          -fno-openmp-new-driver -no-canonical-prefixes -save-temps %t.s 2>&1 \
+// RUN:   %clang -### --target=powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:          -fno-openmp-new-driver -save-temps %t.s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK %s
 
 /// Use DAG to ensure that assembly file has been unbundled.
@@ -62,8 +62,8 @@
 /// ###########################################################################
 
 /// Check cubin file generation and bundling
-// RUN:   %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN:          -fno-openmp-new-driver -no-canonical-prefixes -save-temps %s -c 2>&1 \
+// RUN:   %clang -### --target=powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:          -fno-openmp-new-driver -save-temps %s -c 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-PTXAS-CUBIN-BUNDLING %s
 
 // CHK-PTXAS-CUBIN-BUNDLING: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
@@ -74,8 +74,8 @@
 
 /// Check cubin file unbundling and usage by nvlink
 // RUN:   touch %t.o
-// RUN:   %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN:          -fno-openmp-new-driver -no-canonical-prefixes -save-temps %t.o %S/Inputs/in.so 2>&1 \
+// RUN:   %clang -### --target=powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:          -fno-openmp-new-driver -save-temps %t.o %S/Inputs/in.so 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-CUBIN-UNBUNDLING-NVLINK %s
 
 /// Use DAG to ensure that cubin file has been unbundled.
@@ -90,11 +90,11 @@
 /// Check cubin file generation and usage by nvlink
 // RUN:   touch %t1.o
 // RUN:   touch %t2.o
-// RUN:   %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
+// RUN:   %clang -### --target=powerpc64le-unknown-linux-gnu -fopenmp=libomp \
 // RUN:          -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-TWOCUBIN %s
 /// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
-// RUN:   %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
+// RUN:   %clang -### --target=x86_64-apple-darwin17.0.0 -fopenmp=libomp \
 // RUN:          -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-TWOCUBIN %s
 
@@ -103,7 +103,7 @@
 /// ###########################################################################
 
 /// Check PTXAS is passed -c flag when offloading to an NVIDIA device using OpenMP.
-// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -no-canonical-prefixes %s 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-PTXAS-DEFAULT %s
 
 // CHK-PTXAS-DEFAULT: ptxas{{.*}}" "-c"
@@ -112,7 +112,7 @@
 
 /// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it.
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target \
-// RUN:          -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN:          -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-PTXAS-NORELO %s
 
 // CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c"
@@ -122,7 +122,7 @@
 /// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP
 /// Check that the flag is passed when -fopenmp-relocatable-target is used.
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target \
-// RUN:          -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN:          -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-PTXAS-RELO %s
 
 // CHK-PTXAS-RELO: ptxas{{.*}}" "-c"
@@ -132,7 +132,7 @@
 /// Check that error is not thrown by toolchain when no cuda lib flag is used.
 /// Check that the flag is passed when -fopenmp-relocatable-target is used.
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 \
-// RUN:   -nocudalib -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN:   -nocudalib -fopenmp-relocatable-target -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-FLAG-NOLIBDEVICE %s
 
 // CHK-FLAG-NOLIBDEVICE-NOT: error:{{.*}}sm_60
@@ -142,7 +142,7 @@
 /// Check that error is not thrown by toolchain when no cuda lib device is found when using -S.
 /// Check that the flag is passed when -fopenmp-relocatable-target is used.
 // RUN:   %clang -### -S -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 \
-// RUN:   -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN:   -fopenmp-relocatable-target -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-NOLIBDEVICE %s
 
 // CHK-NOLIBDEVICE-NOT: error:{{.*}}sm_60
@@ -154,7 +154,7 @@
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
 // RUN:   --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
 // RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
-// RUN:   -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN:   -fopenmp-relocatable-target -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-BCLIB %s
 
 /// Specify the directory containing the bitcode lib, check clang picks the right one
@@ -162,13 +162,13 @@
 // RUN:   --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget \
 // RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:   -fopenmp-relocatable-target -save-temps \
-// RUN:   -no-canonical-prefixes %s 2>&1 | FileCheck -check-prefix=CHK-BCLIB-DIR %s
+// RUN:   %s 2>&1 | FileCheck -check-prefix=CHK-BCLIB-DIR %s
 
 /// Create a bogus bitcode library and find it with LIBRARY_PATH
 // RUN:   env LIBRARY_PATH=%S/Inputs/libomptarget/subdir %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
 // RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:   -fopenmp-relocatable-target -save-temps \
-// RUN:   -no-canonical-prefixes %s 2>&1 | FileCheck -check-prefix=CHK-ENV-BCLIB %s
+// RUN:   %s 2>&1 | FileCheck -check-prefix=CHK-ENV-BCLIB %s
 
 // CHK-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-test.bc
 // CHK-BCLIB-DIR: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget{{/|\\\\}}libomptarget-nvptx-sm_35.bc
@@ -181,7 +181,7 @@
 /// Libomptarget requires sm_35 or newer so an sm_35 bitcode library should never exist.
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
 // RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
-// RUN:   -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN:   -fopenmp-relocatable-target -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-BCLIB-WARN %s
 
 // CHK-BCLIB-WARN: no library 'libomptarget-nvptx-sm_35.bc' found in the default clang lib directory or in LIBRARY_PATH; use '--libomptarget-nvptx-bc-path' to specify nvptx bitcode library
@@ -192,7 +192,7 @@
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
 // RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:   --libomptarget-nvptx-bc-path=not-exist.bc \
-// RUN:   -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN:   -fopenmp-relocatable-target -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-BCLIB-ERROR %s
 
 // CHK-BCLIB-ERROR: bitcode library 'not-exist.bc' does not exist
@@ -202,23 +202,23 @@
 /// Check that the error is thrown when CUDA 9.1 or lower version is used.
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
 // RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_90/usr/local/cuda \
-// RUN:   -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN:   -fopenmp-relocatable-target -save-temps %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-CUDA-VERSION-ERROR %s
 
 // CHK-CUDA-VERSION-ERROR: NVPTX target requires CUDA 9.2 or above; CUDA 9.0 detected
 
 /// Check that debug info is emitted in dwarf-2
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O1 --no-cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O1 --no-cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 2>&1 \
 // RUN:   | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --no-cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --no-cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g0 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g0 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_DEBUG %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb0 -O3 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb0 -O3 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_DEBUG %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-directives-only 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-directives-only 2>&1 \
 // RUN:   | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
 
 // DEBUG_DIRECTIVES-NOT: warning: debug
@@ -235,25 +235,25 @@
 // NO_DEBUG: nvlink
 // NO_DEBUG-NOT: "-g"
 
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --no-cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --no-cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g2 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g2 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb2 -O0 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb2 -O0 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g3 -O3 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g3 -O3 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb3 -O2 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb3 -O2 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-tables-only 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-tables-only 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
-// RUN:   %clang -### -no-canonical-prefixes -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb1 -O2 --cuda-noopt-device-debug 2>&1 \
+// RUN:   %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb1 -O2 --cuda-noopt-device-debug 2>&1 \
 // RUN:   | FileCheck -check-prefix=HAS_DEBUG %s
 
 // HAS_DEBUG-NOT: warning: debug
@@ -268,64 +268,64 @@
 // HAS_DEBUG: nvlink
 // HAS_DEBUG-SAME: "-g"
 
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=CUDA_MODE %s
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=CUDA_MODE %s
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=CUDA_MODE %s
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=CUDA_MODE %s
-// CUDA_MODE: clang{{.*}}"-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}"
+// CUDA_MODE: "-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}"
 // CUDA_MODE-SAME: "-fopenmp-cuda-mode"
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_CUDA_MODE %s
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_CUDA_MODE %s
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_CUDA_MODE %s
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_CUDA_MODE %s
 // NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode"
 
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime 2>&1 \
 // RUN:   | FileCheck -check-prefix=FULL_RUNTIME %s
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \
 // RUN:   | FileCheck -check-prefix=FULL_RUNTIME %s
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime 2>&1 \
 // RUN:   | FileCheck -check-prefix=FULL_RUNTIME %s
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \
 // RUN:   | FileCheck -check-prefix=FULL_RUNTIME %s
-// FULL_RUNTIME: clang{{.*}}"-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}"
+// FULL_RUNTIME: "-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}"
 // FULL_RUNTIME-SAME: "-fopenmp-cuda-force-full-runtime"
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_FULL_RUNTIME %s
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_FULL_RUNTIME %s
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_FULL_RUNTIME %s
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO_FULL_RUNTIME %s
 // NO_FULL_RUNTIME-NOT: "-{{fno-|f}}openmp-cuda-force-full-runtime"
 
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \
 // RUN:   | FileCheck -check-prefix=CUDA_RED_RECS %s
-// CUDA_RED_RECS: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
+// CUDA_RED_RECS: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
 // CUDA_RED_RECS-SAME: "-fopenmp-cuda-teams-reduction-recs-num=2048"
 
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=OPENMP_NVPTX_WRAPPERS %s
-// OPENMP_NVPTX_WRAPPERS: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
+// OPENMP_NVPTX_WRAPPERS: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
 // OPENMP_NVPTX_WRAPPERS-SAME: "-internal-isystem" "{{.*}}openmp_wrappers"
 
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN:          -save-temps -no-canonical-prefixes -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \
+// RUN:          -save-temps -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \
 // RUN:   | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s
 
 // SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", "[[SAVE_TEMPS_INPUT1]]"]
 
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64 -Xopenmp-target=nvptx64 -march=sm_35 \
-// RUN:          -save-temps -no-canonical-prefixes %s -o openmp-offload-gpu 2>&1 \
+// RUN:          -save-temps %s -o openmp-offload-gpu 2>&1 \
 // RUN:   | FileCheck -check-prefix=TRIPLE %s
 
 // TRIPLE: "-triple" "nvptx64-nvidia-cuda"
diff --git a/clang/test/Driver/ppc-crbits.cpp b/clang/test/Driver/ppc-crbits.cpp
new file mode 100644
index 0000000..3ed5630
--- /dev/null
+++ b/clang/test/Driver/ppc-crbits.cpp
@@ -0,0 +1,112 @@
+// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -mcpu=pwr10 \
+// RUN:   -mcrbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-CRBITS %s
+// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -mcpu=pwr10 \
+// RUN:   -mno-crbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-NOCRBITS %s
+
+// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -mcpu=pwr9 \
+// RUN:   -mcrbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-CRBITS %s
+// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -mcpu=pwr9 \
+// RUN:   -mno-crbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-NOCRBITS %s
+
+// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -mcpu=pwr8 \
+// RUN:   -mcrbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-CRBITS %s
+// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -mcpu=pwr8 \
+// RUN:   -mno-crbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-NOCRBITS %s
+
+// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -mcpu=pwr7 \
+// RUN:   -mcrbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-CRBITS %s
+// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -mcpu=pwr7 \
+// RUN:   -mno-crbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-NOCRBITS %s
+
+// RUN: %clang -target powerpc-ibm-aix %s -### -mcpu=pwr10 \
+// RUN:   -mcrbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-CRBITS %s
+// RUN: %clang -target powerpc-ibm-aix %s -### -mcpu=pwr10 \
+// RUN:   -mno-crbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-NOCRBITS %s
+
+// RUN: %clang -target powerpc-ibm-aix %s -### -mcpu=pwr9 \
+// RUN:   -mcrbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-CRBITS %s
+// RUN: %clang -target powerpc-ibm-aix %s -### -mcpu=pwr9 \
+// RUN:   -mno-crbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-NOCRBITS %s
+
+// RUN: %clang -target powerpc-ibm-aix %s -### -mcpu=pwr8 \
+// RUN:   -mcrbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-CRBITS %s
+// RUN: %clang -target powerpc-ibm-aix %s -### -mcpu=pwr8 \
+// RUN:   -mno-crbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-NOCRBITS %s
+
+// RUN: %clang -target powerpc-ibm-aix %s -### -mcpu=pwr7 \
+// RUN:   -mcrbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-CRBITS %s
+// RUN: %clang -target powerpc-ibm-aix %s -### -mcpu=pwr7 \
+// RUN:   -mno-crbits -o %t.o 2>&1 | FileCheck -check-prefix=CHECK-NOCRBITS %s
+
+
+// CHECK-NOCRBITS: "-target-feature" "-crbits"
+// CHECK-CRBITS: "-target-feature" "+crbits"
+
+
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -emit-llvm \
+// RUN:   -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -mcrbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -mno-crbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
+
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -emit-llvm \
+// RUN:   -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -mcrbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -mno-crbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
+
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -emit-llvm \
+// RUN:   -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mcrbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mno-crbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
+
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr7 -emit-llvm \
+// RUN:   -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr7 -mcrbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr7 -mno-crbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
+
+// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr10 -emit-llvm \
+// RUN:   -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr10 -mcrbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr10 -mno-crbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
+
+// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr9 -emit-llvm \
+// RUN:   -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr9 -mcrbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr9 -mno-crbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
+
+// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr8 -emit-llvm \
+// RUN:   -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr8 -mcrbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr8 -mno-crbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
+
+// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr7 -emit-llvm \
+// RUN:   -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
+// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr7 -mcrbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
+// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr7 -mno-crbits \
+// RUN:   -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
+
+
+// HAS-CRBITS: main(
+// HAS-CRBITS: attributes #0 = {
+// HAS-CRBITS-SAME: +crbits
+// HAS-NOCRBITS: main(
+// HAS-NOCRBITS: attributes #0 = {
+// HAS-NOCRBITS-SAME: -crbits
+
+int main(int argc, char *argv[]) {
+  return 0;
+}
diff --git a/clang/test/Driver/preprocessor.c b/clang/test/Driver/preprocessor.c
index d396142..897b98a 100644
--- a/clang/test/Driver/preprocessor.c
+++ b/clang/test/Driver/preprocessor.c
@@ -8,7 +8,7 @@ A A
 // RUN: %clang -### -E -dD %s 2>&1 | FileCheck --check-prefix=CHECK-dD %s
 // RUN: %clang -### -E -dM %s 2>&1 | FileCheck --check-prefix=CHECK-dM %s
 // RUN: %clang -### -E -dI %s 2>&1 | FileCheck --check-prefix=CHECK-dI %s
-// CHECK-dD: clang{{.*}} "-cc1" {{.*}} "-dD"
-// CHECK-dM: clang{{.*}} "-cc1" {{.*}} "-dM"
-// CHECK-dI: clang{{.*}} "-cc1" {{.*}} "-dI"
+// CHECK-dD: "-cc1" {{.*}} "-dD"
+// CHECK-dM: "-cc1" {{.*}} "-dM"
+// CHECK-dI: "-cc1" {{.*}} "-dI"
 
diff --git a/clang/test/Driver/print-multi-directory.c b/clang/test/Driver/print-multi-directory.c
index 2504c28..46e9ba5 100644
--- a/clang/test/Driver/print-multi-directory.c
+++ b/clang/test/Driver/print-multi-directory.c
@@ -1,5 +1,5 @@
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>/dev/null \
-// RUN:     -target i386-none-linux \
+// RUN: %clang -### %s 2>/dev/null \
+// RUN:     --target=i386-none-linux \
 // RUN:     --sysroot=%S/Inputs/multilib_64bit_linux_tree/usr \
 // RUN:     -print-multi-directory \
 // RUN:   | FileCheck --match-full-lines --check-prefix=CHECK-X86-MULTILIBS %s
@@ -7,8 +7,8 @@
 // CHECK-X86-MULTILIBS:      32
 // CHECK-X86-MULTILIBS-NOT:  {{^.+$}}
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>/dev/null \
-// RUN:     -target i386-none-linux -m64 \
+// RUN: %clang -### %s 2>/dev/null \
+// RUN:     --target=i386-none-linux -m64 \
 // RUN:     --sysroot=%S/Inputs/multilib_64bit_linux_tree/usr \
 // RUN:     -print-multi-directory \
 // RUN:   | FileCheck --match-full-lines --check-prefix=CHECK-X86_64-MULTILIBS %s
@@ -16,8 +16,8 @@
 // CHECK-X86_64-MULTILIBS:      .
 // CHECK-X86_64-MULTILIBS-NOT:  {{^.+$}}
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>/dev/null \
-// RUN:     -target arm-linux-androideabi21 \
+// RUN: %clang -### %s 2>/dev/null \
+// RUN:     --target=arm-linux-androideabi21 \
 // RUN:     -mthumb \
 // RUN:     --gcc-toolchain=%S/Inputs/basic_android_ndk_tree \
 // RUN:     --sysroot=%S/Inputs/basic_android_ndk_tree/sysroot \
diff --git a/clang/test/Driver/ps4-ps5-toolchain.c b/clang/test/Driver/ps4-ps5-toolchain.c
index 308b80c..444e9df 100644
--- a/clang/test/Driver/ps4-ps5-toolchain.c
+++ b/clang/test/Driver/ps4-ps5-toolchain.c
@@ -1,7 +1,13 @@
-// PS4/PS5 miscellaneous toolchain defaults.
+/// PS4/PS5 miscellaneous toolchain behavior.
 
 // RUN: %clang -c %s -### -target x86_64-scei-ps4 2>&1 | FileCheck %s
 // RUN: %clang -c %s -### -target x86_64-sie-ps5 2>&1 | FileCheck %s
 // CHECK-DAG: "-ffunction-sections"
 // CHECK-DAG: "-fdata-sections"
 // CHECK-DAG: "-fdeclspec"
+
+/// Verify LTO is enabled (no diagnostic).
+// RUN: %clang %s -### -target x86_64-scei-ps4 -flto 2>&1 | FileCheck %s --check-prefix=LTO
+// RUN: %clang %s -### -target x86_64-sie-ps5 -flto 2>&1 | FileCheck %s --check-prefix=LTO
+// LTO-NOT: error:
+// LTO-NOT: unable to pass LLVM bit-code
diff --git a/clang/test/Driver/report-stat.c b/clang/test/Driver/report-stat.c
index 3662d0d..6c0c737 100644
--- a/clang/test/Driver/report-stat.c
+++ b/clang/test/Driver/report-stat.c
@@ -1,6 +1,6 @@
-// RUN: %clang -c -fproc-stat-report -fintegrated-as %s | FileCheck %s
+// RUN: %clang -no-canonical-prefixes -c -fproc-stat-report -fintegrated-as %s -o %t.o | FileCheck %s
 // CHECK: clang{{.*}}: output={{.*}}.o, total={{[0-9.]+}} ms, user={{[0-9.]+}} ms, mem={{[0-9]+}} Kb
 
-// RUN: %clang -c -fintegrated-as -fproc-stat-report=%t %s
+// RUN: %clang -no-canonical-prefixes -c -fintegrated-as -fproc-stat-report=%t %s -o %t.o
 // RUN: cat %t | FileCheck --check-prefix=CSV %s
 // CSV: clang{{.*}},"{{.*}}.o",{{[0-9]+}},{{[0-9]+}},{{[0-9]+}}
diff --git a/clang/test/Driver/riscv32-toolchain.c b/clang/test/Driver/riscv32-toolchain.c
index 1e6c8de..1682e12 100644
--- a/clang/test/Driver/riscv32-toolchain.c
+++ b/clang/test/Driver/riscv32-toolchain.c
@@ -1,13 +1,13 @@
 // UNSUPPORTED: system-windows
 // A basic clang -cc1 command-line, and simple environment check.
 
-// RUN: %clang %s -### -no-canonical-prefixes --target=riscv32 \
+// RUN: %clang -### %s --target=riscv32 \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv32_tree 2>&1 \
 // RUN:   | FileCheck -check-prefix=CC1 %s
-// CC1: clang{{.*}} "-cc1" "-triple" "riscv32"
+// CC1: "-cc1" "-triple" "riscv32"
 
 // Test interaction with -fuse-ld=lld, if lld is available.
-// RUN: %clang %s -### --target=riscv32 \
+// RUN: %clang -### %s --target=riscv32 \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv32_tree -fuse-ld=lld 2>&1 \
 // RUN:   | FileCheck -check-prefix=LLD %s
 // LLD: {{(error: invalid linker name in argument '-fuse-ld=lld')|(ld.lld)}}
@@ -15,7 +15,7 @@
 // In the below tests, --rtlib=platform is used so that the driver ignores
 // the configure-time CLANG_DEFAULT_RTLIB option when choosing the runtime lib
 
-// RUN: %clang %s -### -fuse-ld= \
+// RUN: %clang -### %s -fuse-ld= \
 // RUN:   --target=riscv32-unknown-elf --rtlib=platform \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv32_tree \
 // RUN:   --sysroot=%S/Inputs/basic_riscv32_tree/riscv32-unknown-elf 2>&1 \
@@ -30,7 +30,7 @@
 // C-RV32-BAREMETAL-ILP32: "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // C-RV32-BAREMETAL-ILP32: "{{.*}}/Inputs/basic_riscv32_tree/lib/gcc/riscv32-unknown-elf/8.0.1{{/|\\\\}}crtend.o"
 
-// RUN: %clang %s -### -fuse-ld= \
+// RUN: %clang -### %s -fuse-ld= \
 // RUN:   --target=riscv32-unknown-elf --rtlib=platform \
 // RUN:   --sysroot= \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv32_tree 2>&1 \
@@ -44,7 +44,7 @@
 // C-RV32-BAREMETAL-NOSYSROOT-ILP32: "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // C-RV32-BAREMETAL-NOSYSROOT-ILP32: "{{.*}}/Inputs/basic_riscv32_tree/lib/gcc/riscv32-unknown-elf/8.0.1{{/|\\\\}}crtend.o"
 
-// RUN: %clangxx %s -### -fuse-ld= \
+// RUN: %clangxx -### %s -fuse-ld= \
 // RUN:   --target=riscv32-unknown-elf -stdlib=libstdc++ --rtlib=platform \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv32_tree \
 // RUN:   --sysroot=%S/Inputs/basic_riscv32_tree/riscv32-unknown-elf 2>&1 \
@@ -60,7 +60,7 @@
 // CXX-RV32-BAREMETAL-ILP32: "-lstdc++" "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // CXX-RV32-BAREMETAL-ILP32: "{{.*}}/Inputs/basic_riscv32_tree/lib/gcc/riscv32-unknown-elf/8.0.1{{/|\\\\}}crtend.o"
 
-// RUN: %clangxx %s -### -fuse-ld= \
+// RUN: %clangxx -### %s -fuse-ld= \
 // RUN:   --target=riscv32-unknown-elf -stdlib=libstdc++ --rtlib=platform \
 // RUN:   --sysroot= \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv32_tree 2>&1 \
@@ -75,7 +75,7 @@
 // CXX-RV32-BAREMETAL-NOSYSROOT-ILP32: "-lstdc++" "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // CXX-RV32-BAREMETAL-NOSYSROOT-ILP32: "{{.*}}/Inputs/basic_riscv32_tree/lib/gcc/riscv32-unknown-elf/8.0.1{{/|\\\\}}crtend.o"
 
-// RUN: env "PATH=" %clang %s -### -fuse-ld=ld -no-pie \
+// RUN: env "PATH=" %clang -### %s -fuse-ld=ld -no-pie \
 // RUN:   --target=riscv32-unknown-linux-gnu --rtlib=platform -mabi=ilp32 \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_linux_sdk \
 // RUN:   --sysroot=%S/Inputs/multilib_riscv_linux_sdk/sysroot 2>&1 \
@@ -90,7 +90,7 @@
 // C-RV32-LINUX-MULTI-ILP32: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot/lib32/ilp32"
 // C-RV32-LINUX-MULTI-ILP32: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib32/ilp32"
 
-// RUN: env "PATH=" %clang %s -### -fuse-ld=ld -no-pie \
+// RUN: env "PATH=" %clang -### %s -fuse-ld=ld -no-pie \
 // RUN:   --target=riscv32-unknown-linux-gnu --rtlib=platform -march=rv32imafd \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_linux_sdk \
 // RUN:   --sysroot=%S/Inputs/multilib_riscv_linux_sdk/sysroot 2>&1 \
@@ -105,7 +105,7 @@
 // C-RV32-LINUX-MULTI-ILP32D: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot/lib32/ilp32d"
 // C-RV32-LINUX-MULTI-ILP32D: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib32/ilp32d"
 
-// RUN: env "PATH=" %clang %s -### -fuse-ld=ld \
+// RUN: env "PATH=" %clang -### %s -fuse-ld=ld \
 // RUN:   --target=riscv32-unknown-elf --rtlib=platform --sysroot= \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_elf_sdk 2>&1 \
 // RUN:   | FileCheck -check-prefix=C-RV32I-BAREMETAL-MULTI-ILP32 %s
@@ -119,7 +119,7 @@
 // C-RV32I-BAREMETAL-MULTI-ILP32: "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // C-RV32I-BAREMETAL-MULTI-ILP32: "{{.*}}/Inputs/multilib_riscv_elf_sdk/lib/gcc/riscv64-unknown-elf/8.2.0/rv32imac/ilp32{{/|\\\\}}crtend.o"
 
-// RUN: env "PATH=" %clang %s -### -fuse-ld=ld \
+// RUN: env "PATH=" %clang -### %s -fuse-ld=ld \
 // RUN:   --target=riscv32-unknown-elf --rtlib=platform --sysroot= \
 // RUN:   -march=rv32im -mabi=ilp32\
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_elf_sdk 2>&1 \
@@ -134,7 +134,7 @@
 // C-RV32IM-BAREMETAL-MULTI-ILP32: "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // C-RV32IM-BAREMETAL-MULTI-ILP32: "{{.*}}/Inputs/multilib_riscv_elf_sdk/lib/gcc/riscv64-unknown-elf/8.2.0/rv32im/ilp32{{/|\\\\}}crtend.o"
 
-// RUN: env "PATH=" %clang %s -### -fuse-ld=ld \
+// RUN: env "PATH=" %clang -### %s -fuse-ld=ld \
 // RUN:   --target=riscv32-unknown-elf --rtlib=platform --sysroot= \
 // RUN:   -march=rv32iac -mabi=ilp32\
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_elf_sdk 2>&1 \
@@ -149,7 +149,7 @@
 // C-RV32IAC-BAREMETAL-MULTI-ILP32: "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // C-RV32IAC-BAREMETAL-MULTI-ILP32: "{{.*}}/Inputs/multilib_riscv_elf_sdk/lib/gcc/riscv64-unknown-elf/8.2.0/rv32iac/ilp32{{/|\\\\}}crtend.o"
 
-// RUN: env "PATH=" %clang %s -### -fuse-ld=ld \
+// RUN: env "PATH=" %clang -### %s -fuse-ld=ld \
 // RUN:   --target=riscv32-unknown-elf --rtlib=platform --sysroot= \
 // RUN:   -march=rv32imac -mabi=ilp32\
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_elf_sdk 2>&1 \
@@ -164,7 +164,7 @@
 // C-RV32IMAC-BAREMETAL-MULTI-ILP32: "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // C-RV32IMAC-BAREMETAL-MULTI-ILP32: "{{.*}}/Inputs/multilib_riscv_elf_sdk/lib/gcc/riscv64-unknown-elf/8.2.0/rv32imac/ilp32{{/|\\\\}}crtend.o"
 
-// RUN: env "PATH=" %clang %s -### -fuse-ld=ld \
+// RUN: env "PATH=" %clang -### %s -fuse-ld=ld \
 // RUN:   --target=riscv32-unknown-elf --rtlib=platform --sysroot= \
 // RUN:   -march=rv32imafc -mabi=ilp32f \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_elf_sdk 2>&1 \
@@ -180,7 +180,7 @@
 // C-RV32IMAFC-BAREMETAL-MULTI-ILP32F: "{{.*}}/Inputs/multilib_riscv_elf_sdk/lib/gcc/riscv64-unknown-elf/8.2.0/rv32imafc/ilp32f{{/|\\\\}}crtend.o"
 
 // Check that --rtlib can be used to override the used runtime library
-// RUN: %clang %s -### \
+// RUN: %clang -### %s \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_elf_sdk \
 // RUN:   --target=riscv32-unknown-elf --rtlib=libgcc 2>&1 \
 // RUN:   | FileCheck -check-prefix=C-RV32-RTLIB-LIBGCC-ILP32 %s
@@ -189,7 +189,7 @@
 // C-RV32-RTLIB-LIBGCC-ILP32: "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // C-RV32-RTLIB-LIBGCC-ILP32: "{{.*}}crtend.o"
 
-// RUN: %clang %s -### \
+// RUN: %clang -### %s \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_elf_sdk \
 // RUN:   --target=riscv32-unknown-elf --rtlib=compiler-rt 2>&1 \
 // RUN:   | FileCheck -check-prefix=C-RV32-RTLIB-COMPILERRT-ILP32 %s
@@ -198,14 +198,14 @@
 // C-RV32-RTLIB-COMPILERRT-ILP32: "--start-group" "-lc" "-lgloss" "--end-group" "{{.*}}libclang_rt.builtins-riscv32.a"
 // C-RV32-RTLIB-COMPILERRT-ILP32: "{{.*}}clang_rt.crtend-riscv32.o"
 
-// RUN: %clang %s -### --target=riscv32 \
+// RUN: %clang -### %s --target=riscv32 \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv32_tree --sysroot= \
 // RUN:   -resource-dir=%s/Inputs/resource_dir 2>&1 \
 // RUN:   | FileCheck -check-prefix=RESOURCE-INC %s
 // RESOURCE-INC: "-internal-isystem" "{{.*}}/Inputs/resource_dir{{/|\\\\}}include"
 // RESOURCE-INC: "-internal-isystem" "{{.*}}/basic_riscv32_tree{{.*}}riscv32-unknown-elf{{/|\\\\}}include"
 
-// RUN: %clang %s -### --target=riscv32 \
+// RUN: %clang -### %s --target=riscv32 \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv32_tree --sysroot= \
 // RUN:   -resource-dir=%s/Inputs/resource_dir -nobuiltininc 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO-RESOURCE-INC %s
diff --git a/clang/test/Driver/riscv64-toolchain.c b/clang/test/Driver/riscv64-toolchain.c
index 4c7c045..ec37074 100644
--- a/clang/test/Driver/riscv64-toolchain.c
+++ b/clang/test/Driver/riscv64-toolchain.c
@@ -1,13 +1,13 @@
 // UNSUPPORTED: system-windows
 // A basic clang -cc1 command-line, and simple environment check.
 
-// RUN: %clang %s -### -no-canonical-prefixes --target=riscv64 \
+// RUN: %clang -### %s --target=riscv64 \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv64_tree 2>&1 \
 // RUN:   | FileCheck -check-prefix=CC1 %s
-// CC1: clang{{.*}} "-cc1" "-triple" "riscv64"
+// CC1: "-cc1" "-triple" "riscv64"
 
 // Test interaction with -fuse-ld=lld, if lld is available.
-// RUN: %clang %s -### --target=riscv32 -fuse-ld=lld \
+// RUN: %clang -### %s --target=riscv32 -fuse-ld=lld \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv64_tree 2>&1 \
 // RUN:   | FileCheck -check-prefix=LLD %s
 // LLD: {{(error: invalid linker name in argument '-fuse-ld=lld')|(ld.lld)}}
@@ -15,7 +15,7 @@
 // In the below tests, --rtlib=platform is used so that the driver ignores
 // the configure-time CLANG_DEFAULT_RTLIB option when choosing the runtime lib
 
-// RUN: %clang %s -### -fuse-ld= \
+// RUN: %clang -### %s -fuse-ld= \
 // RUN:   --target=riscv64-unknown-elf --rtlib=platform \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv64_tree \
 // RUN:   --sysroot=%S/Inputs/basic_riscv64_tree/riscv64-unknown-elf 2>&1 -no-pie \
@@ -30,7 +30,7 @@
 // C-RV64-BAREMETAL-LP64: "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // C-RV64-BAREMETAL-LP64: "{{.*}}/Inputs/basic_riscv64_tree/lib/gcc/riscv64-unknown-elf/8.0.1{{/|\\\\}}crtend.o"
 
-// RUN: %clang %s -### -fuse-ld= \
+// RUN: %clang -### %s -fuse-ld= \
 // RUN:   --target=riscv64-unknown-elf --rtlib=platform \
 // RUN:   --sysroot= \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv64_tree 2>&1 \
@@ -44,7 +44,7 @@
 // C-RV64-BAREMETAL-NOSYSROOT-LP64: "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // C-RV64-BAREMETAL-NOSYSROOT-LP64: "{{.*}}/Inputs/basic_riscv64_tree/lib/gcc/riscv64-unknown-elf/8.0.1{{/|\\\\}}crtend.o"
 
-// RUN: %clangxx %s -### -fuse-ld= \
+// RUN: %clangxx -### %s -fuse-ld= \
 // RUN:   --target=riscv64-unknown-elf -stdlib=libstdc++ --rtlib=platform \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv64_tree \
 // RUN:   --sysroot=%S/Inputs/basic_riscv64_tree/riscv64-unknown-elf 2>&1 \
@@ -60,7 +60,7 @@
 // CXX-RV64-BAREMETAL-LP64: "-lstdc++" "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // CXX-RV64-BAREMETAL-LP64: "{{.*}}/Inputs/basic_riscv64_tree/lib/gcc/riscv64-unknown-elf/8.0.1{{/|\\\\}}crtend.o"
 
-// RUN: %clangxx %s -### -fuse-ld= \
+// RUN: %clangxx -### %s -fuse-ld= \
 // RUN:   --target=riscv64-unknown-elf -stdlib=libstdc++ --rtlib=platform \
 // RUN:   --sysroot= \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv64_tree 2>&1 \
@@ -75,7 +75,7 @@
 // CXX-RV64-BAREMETAL-NOSYSROOT-LP64: "-lstdc++" "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // CXX-RV64-BAREMETAL-NOSYSROOT-LP64: "{{.*}}/Inputs/basic_riscv64_tree/lib/gcc/riscv64-unknown-elf/8.0.1{{/|\\\\}}crtend.o"
 
-// RUN: %clang %s -### -fuse-ld= -no-pie \
+// RUN: %clang -### %s -fuse-ld= -no-pie \
 // RUN:   --target=riscv64-unknown-linux-gnu --rtlib=platform -mabi=lp64 \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_linux_sdk \
 // RUN:   --sysroot=%S/Inputs/multilib_riscv_linux_sdk/sysroot 2>&1 \
@@ -90,7 +90,7 @@
 // C-RV64-LINUX-MULTI-LP64: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot/lib64/lp64"
 // C-RV64-LINUX-MULTI-LP64: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib64/lp64"
 
-// RUN: %clang %s -### -fuse-ld=ld -no-pie \
+// RUN: %clang -### %s -fuse-ld=ld -no-pie \
 // RUN:   --target=riscv64-unknown-linux-gnu --rtlib=platform -march=rv64imafd \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_linux_sdk \
 // RUN:   --sysroot=%S/Inputs/multilib_riscv_linux_sdk/sysroot 2>&1 \
@@ -105,7 +105,7 @@
 // C-RV64-LINUX-MULTI-LP64D: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot/lib64/lp64d"
 // C-RV64-LINUX-MULTI-LP64D: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib64/lp64d"
 
-// RUN: env "PATH=" %clang %s -### -fuse-ld=ld \
+// RUN: env "PATH=" %clang -### %s -fuse-ld=ld \
 // RUN:   --target=riscv64-unknown-elf --rtlib=platform --sysroot= \
 // RUN:   -march=rv64imac -mabi=lp64\
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_elf_sdk 2>&1 \
@@ -120,7 +120,7 @@
 // C-RV64IMAC-BAREMETAL-MULTI-LP64: "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // C-RV64IMAC-BAREMETAL-MULTI-LP64: "{{.*}}/Inputs/multilib_riscv_elf_sdk/lib/gcc/riscv64-unknown-elf/8.2.0/rv64imac/lp64{{/|\\\\}}crtend.o"
 
-// RUN: env "PATH=" %clang %s -### -fuse-ld=ld \
+// RUN: env "PATH=" %clang -### %s -fuse-ld=ld \
 // RUN:   --target=riscv64-unknown-elf --rtlib=platform --sysroot= \
 // RUN:   -march=rv64imafdc -mabi=lp64d \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_elf_sdk 2>&1 \
@@ -136,7 +136,7 @@
 // C-RV64IMAFDC-BAREMETAL-MULTI-ILP64D: "{{.*}}/Inputs/multilib_riscv_elf_sdk/lib/gcc/riscv64-unknown-elf/8.2.0/rv64imafdc/lp64d{{/|\\\\}}crtend.o"
 
 // Check that --rtlib can be used to override the used runtime library
-// RUN: %clang %s -### \
+// RUN: %clang -### %s \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_elf_sdk \
 // RUN:   --target=riscv64-unknown-elf --rtlib=libgcc 2>&1 \
 // RUN:   | FileCheck -check-prefix=C-RV64-RTLIB-LIBGCC-LP64 %s
@@ -145,7 +145,7 @@
 // C-RV64-RTLIB-LIBGCC-LP64: "--start-group" "-lc" "-lgloss" "--end-group" "-lgcc"
 // C-RV64-RTLIB-LIBGCC-LP64: "{{.*}}crtend.o"
 
-// RUN: %clang %s -### \
+// RUN: %clang -### %s \
 // RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_elf_sdk \
 // RUN:   --target=riscv64-unknown-elf --rtlib=compiler-rt 2>&1 \
 // RUN:   | FileCheck -check-prefix=C-RV64-RTLIB-COMPILERRT-LP64 %s
@@ -154,14 +154,14 @@
 // C-RV64-RTLIB-COMPILERRT-LP64: "--start-group" "-lc" "-lgloss" "--end-group" "{{.*}}libclang_rt.builtins-riscv64.a"
 // C-RV64-RTLIB-COMPILERRT-LP64: "{{.*}}clang_rt.crtend-riscv64.o"
 
-// RUN: %clang %s -### --target=riscv64 \
+// RUN: %clang -### %s --target=riscv64 \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv64_tree --sysroot= \
 // RUN:   -resource-dir=%s/Inputs/resource_dir 2>&1 \
 // RUN:   | FileCheck -check-prefix=RESOURCE-INC %s
 // RESOURCE-INC: "-internal-isystem" "{{.*}}/Inputs/resource_dir{{/|\\\\}}include"
 // RESOURCE-INC: "-internal-isystem" "{{.*}}/basic_riscv64_tree/{{.*}}riscv64-unknown-elf{{/|\\\\}}include"
 
-// RUN: %clang %s -### --target=riscv64 \
+// RUN: %clang -### %s --target=riscv64 \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_riscv64_tree --sysroot= \
 // RUN:   -resource-dir=%s/Inputs/resource_dir -nobuiltininc 2>&1 \
 // RUN:   | FileCheck -check-prefix=NO-RESOURCE-INC %s
diff --git a/clang/test/Driver/sanitizer-ld.c b/clang/test/Driver/sanitizer-ld.c
index def3ef9..ea58a80 100644
--- a/clang/test/Driver/sanitizer-ld.c
+++ b/clang/test/Driver/sanitizer-ld.c
@@ -1,7 +1,7 @@
 // Test sanitizers ld flags.
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld -fsanitize=address \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld -fsanitize=address \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-LINUX %s
@@ -16,16 +16,16 @@
 // CHECK-ASAN-LINUX: "-lrt"
 // CHECK-ASAN-LINUX: "-ldl"
 
-// RUN: %clang -fsanitize=address -fno-sanitize-link-runtime %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=address -fno-sanitize-link-runtime -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-NO-LINK-RUNTIME-LINUX %s
 //
 // CHECK-ASAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.asan-x86_64
 
-// RUN: %clang -fsanitize=address %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=address -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-EXECUTABLE-LINUX %s
@@ -33,8 +33,8 @@
 // CHECK-ASAN-EXECUTABLE-LINUX: libclang_rt.asan_static-x86_64
 // CHECK-ASAN-EXECUTABLE-LINUX: libclang_rt.asan-x86_64
 
-// RUN: %clang -fsanitize=address -shared %s -### -o %t.o 2>&1  \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=address -shared -### %s 2>&1  \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-SHARED-LINUX %s
@@ -42,20 +42,20 @@
 // CHECK-ASAN-SHARED-LINUX: libclang_rt.asan_static-x86_64
 // CHECK-ASAN-SHARED-LINUX-NOT: libclang_rt.asan-x86_64
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libsan \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libsan \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-SHARED-ASAN-LINUX %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libasan \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libasan \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-SHARED-ASAN-LINUX %s
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld -fsanitize=address \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld -fsanitize=address \
 // RUN:     -shared-libsan -static-libsan -shared-libasan             \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -72,8 +72,8 @@
 // CHECK-SHARED-ASAN-LINUX-NOT: "--export-dynamic"
 // CHECK-SHARED-ASAN-LINUX-NOT: "--dynamic-list"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.so -shared 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libsan \
+// RUN: %clang -### %s -o %t.so -shared 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libsan \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-DSO-SHARED-ASAN-LINUX %s
@@ -89,8 +89,8 @@
 // CHECK-DSO-SHARED-ASAN-LINUX-NOT: "--export-dynamic"
 // CHECK-DSO-SHARED-ASAN-LINUX-NOT: "--dynamic-list"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-freebsd -fuse-ld=ld -fsanitize=address \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=i386-unknown-freebsd -fuse-ld=ld -fsanitize=address \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_freebsd_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-FREEBSD %s
@@ -105,8 +105,8 @@
 // CHECK-ASAN-FREEBSD: "-lpthread"
 // CHECK-ASAN-FREEBSD: "-lrt"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-freebsd -fuse-ld=ld -fsanitize=address \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=i386-unknown-freebsd -fuse-ld=ld -fsanitize=address \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_freebsd_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-FREEBSD-LDL %s
@@ -114,8 +114,8 @@
 // CHECK-ASAN-FREEBSD-LDL: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-ASAN-FREEBSD-LDL-NOT: "-ldl"
 
-// RUN: %clangxx -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld -stdlib=platform -fsanitize=address \
+// RUN: %clangxx -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld -stdlib=platform -fsanitize=address \
 // RUN:     -resource-dir=%S/Inputs/empty_resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-LINUX-CXX %s
@@ -131,8 +131,8 @@
 // CHECK-ASAN-LINUX-CXX: "-lrt"
 // CHECK-ASAN-LINUX-CXX: "-ldl"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o /dev/null -fsanitize=address \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld -stdlib=platform \
+// RUN: %clang -### %s -o /dev/null -fsanitize=address \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld -stdlib=platform \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree -lstdc++ -static 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-LINUX-CXX-STATIC %s
@@ -142,8 +142,8 @@
 // CHECK-ASAN-LINUX-CXX-STATIC: "--whole-archive" "{{.*}}libclang_rt.asan-i386.a" "--no-whole-archive"
 // CHECK-ASAN-LINUX-CXX-STATIC: stdc++
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target arm-linux-gnueabi -fuse-ld=ld -fsanitize=address \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=arm-linux-gnueabi -fuse-ld=ld -fsanitize=address \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-ARM %s
 //
@@ -151,8 +151,8 @@
 // CHECK-ASAN-ARM-NOT: "-lc"
 // CHECK-ASAN-ARM: libclang_rt.asan-arm.a"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target armv7l-linux-gnueabi -fuse-ld=ld -fsanitize=address \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=armv7l-linux-gnueabi -fuse-ld=ld -fsanitize=address \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-ARMv7 %s
 //
@@ -160,8 +160,8 @@
 // CHECK-ASAN-ARMv7-NOT: "-lc"
 // CHECK-ASAN-ARMv7: libclang_rt.asan-arm.a"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target arm-linux-androideabi -fuse-ld=ld -fsanitize=address \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=arm-linux-androideabi -fuse-ld=ld -fsanitize=address \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-ANDROID %s
 //
@@ -172,8 +172,8 @@
 // CHECK-ASAN-ANDROID: libclang_rt.asan-arm-android.so"
 // CHECK-ASAN-ANDROID-NOT: "-lpthread"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target arm-linux-androideabi -fuse-ld=ld -fsanitize=address \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=arm-linux-androideabi -fuse-ld=ld -fsanitize=address \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -static-libsan \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-ANDROID-STATICLIBASAN %s
@@ -183,8 +183,8 @@
 // CHECK-ASAN-ANDROID-STATICLIBASAN-NOT: "-lpthread"
 // CHECK-ASAN-ANDROID-STATICLIBASAN-NOT: "-lrt"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target arm-linux-androideabi -fuse-ld=ld -fsanitize=undefined \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=arm-linux-androideabi -fuse-ld=ld -fsanitize=undefined \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-ANDROID %s
 //
@@ -195,8 +195,8 @@
 // CHECK-UBSAN-ANDROID: libclang_rt.ubsan_standalone-arm-android.so"
 // CHECK-UBSAN-ANDROID-NOT: "-lpthread"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target arm-linux-androideabi -fuse-ld=ld -fsanitize=undefined \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=arm-linux-androideabi -fuse-ld=ld -fsanitize=undefined \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -static-libsan \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-ANDROID-STATICLIBASAN %s
@@ -207,8 +207,8 @@
 // CHECK-UBSAN-ANDROID-STATICLIBASAN-NOT: "-lrt"
 
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target i686-linux-android -fuse-ld=ld -fsanitize=address \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=i686-linux-android -fuse-ld=ld -fsanitize=address \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-ANDROID-X86 %s
 //
@@ -219,16 +219,16 @@
 // CHECK-ASAN-ANDROID-X86: libclang_rt.asan-i686-android.so"
 // CHECK-ASAN-ANDROID-X86-NOT: "-lpthread"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target arm-linux-androideabi -fsanitize=address \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=arm-linux-androideabi -fsanitize=address \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared-libsan \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-ANDROID-SHARED-LIBASAN %s
 //
 // CHECK-ASAN-ANDROID-SHARED-LIBASAN-NOT: argument unused during compilation: '-shared-libsan'
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target arm-linux-androideabi -fuse-ld=ld -fsanitize=address \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=arm-linux-androideabi -fuse-ld=ld -fsanitize=address \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-ANDROID-SHARED %s
@@ -238,8 +238,8 @@
 // CHECK-ASAN-ANDROID-SHARED: libclang_rt.asan-arm-android.so"
 // CHECK-ASAN-ANDROID-SHARED-NOT: "-lpthread"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target sparcel-myriad-rtems-elf -fuse-ld=ld -fsanitize=address \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=sparcel-myriad-rtems-elf -fuse-ld=ld -fsanitize=address \
 // RUN:     --sysroot=%S/Inputs/basic_myriad_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-MYRIAD %s
 //
@@ -247,8 +247,8 @@
 // CHECK-ASAN-MYRIAD-NOT: "-lc"
 // CHECK-ASAN-MYRIAD: libclang_rt.asan-sparcel.a"
 
-// RUN: %clangxx -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld -stdlib=platform -lstdc++ \
+// RUN: %clangxx -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld -stdlib=platform -lstdc++ \
 // RUN:     -fsanitize=thread \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -266,16 +266,16 @@
 // CHECK-TSAN-LINUX-CXX: "-lrt"
 // CHECK-TSAN-LINUX-CXX: "-ldl"
 
-// RUN: %clang -fsanitize=thread -fno-sanitize-link-runtime %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=thread -fno-sanitize-link-runtime -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-TSAN-NO-LINK-RUNTIME-LINUX %s
 //
 // CHECK-TSAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.tsan
 
-// RUN: %clangxx -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld -stdlib=platform -lstdc++ \
+// RUN: %clangxx -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld -stdlib=platform -lstdc++ \
 // RUN:     -fsanitize=memory \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -293,28 +293,28 @@
 // CHECK-MSAN-LINUX-CXX: "-lrt"
 // CHECK-MSAN-LINUX-CXX: "-ldl"
 
-// RUN: %clang -fsanitize=memory -fno-sanitize-link-runtime %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=memory -fno-sanitize-link-runtime -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-MSAN-NO-LINK-RUNTIME-LINUX %s
 //
 // CHECK-MSAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.msan
 
-// RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnux32 -fuse-ld=ld \
+// RUN: %clang -fsanitize=undefined -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnux32 -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/multilib_64bit_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-LINUX %s
 
-// RUN: %clang -fsanitize=float-divide-by-zero %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnux32 -fuse-ld=ld \
+// RUN: %clang -fsanitize=float-divide-by-zero -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnux32 -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/multilib_64bit_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-LINUX %s
 
-// RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux-gnux32 -fuse-ld=ld \
+// RUN: %clang -fsanitize=undefined -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux-gnux32 -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/multilib_64bit_linux_tree \
 // RUN:     -static-libsan \
@@ -329,30 +329,30 @@
 // CHECK-UBSAN-LINUX-NOT: "-lstdc++"
 // CHECK-UBSAN-LINUX: "-lpthread"
 
-// RUN: %clang -fsanitize=undefined -fno-sanitize-link-runtime %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=undefined -fno-sanitize-link-runtime -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-NO-LINK-RUNTIME-LINUX %s
 //
 // CHECK-UBSAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.undefined
 
-// RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=undefined -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:     -shared-libsan \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-LINUX-SHAREDLIBASAN %s
 
-// RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=undefined -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:     -static-libsan -shared-libsan \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-LINUX-SHAREDLIBASAN %s
 
-// RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=undefined -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:     -shared -shared-libsan \
@@ -361,8 +361,8 @@
 // CHECK-UBSAN-LINUX-SHAREDLIBASAN: "{{.*}}ld{{(.exe)?}}"
 // CHECK-UBSAN-LINUX-SHAREDLIBASAN: "{{.*}}libclang_rt.ubsan_standalone-i386.so{{.*}}"
 
-// RUN: %clang -fsanitize=undefined -fsanitize-link-c++-runtime %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux \
+// RUN: %clang -fsanitize=undefined -fsanitize-link-c++-runtime -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-LINUX-LINK-CXX %s
@@ -370,8 +370,8 @@
 // CHECK-UBSAN-LINUX-LINK-CXX: "--whole-archive" "{{.*}}libclang_rt.ubsan_standalone_cxx-i386.a" "--no-whole-archive"
 // CHECK-UBSAN-LINUX-LINK-CXX-NOT: "-lstdc++"
 
-// RUN: %clangxx -fsanitize=undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld -stdlib=platform \
+// RUN: %clangxx -fsanitize=undefined -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld -stdlib=platform \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-LINUX-CXX %s
@@ -385,8 +385,8 @@
 // CHECK-UBSAN-LINUX-CXX-NOT: libclang_rt.asan
 // CHECK-UBSAN-LINUX-CXX: "-lpthread"
 
-// RUN: %clang -fsanitize=undefined -fsanitize-minimal-runtime %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=undefined -fsanitize-minimal-runtime -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-MINIMAL-LINUX %s
@@ -394,22 +394,22 @@
 // CHECK-UBSAN-MINIMAL-LINUX: "--whole-archive" "{{.*}}libclang_rt.ubsan_minimal-i386.a" "--no-whole-archive"
 // CHECK-UBSAN-MINIMAL-LINUX: "-lpthread"
 
-// RUN: %clang -fsanitize=undefined -fsanitize-minimal-runtime %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-apple-darwin -fuse-ld=ld \
+// RUN: %clang -fsanitize=undefined -fsanitize-minimal-runtime -### %s 2>&1 \
+// RUN:     --target=x86_64-apple-darwin -fuse-ld=ld \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-MINIMAL-DARWIN %s
 // CHECK-UBSAN-MINIMAL-DARWIN: "{{.*}}ld{{(.exe)?}}"
 // CHECK-UBSAN-MINIMAL-DARWIN: "{{.*}}libclang_rt.ubsan_minimal_osx_dynamic.dylib"
 
-// RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-apple-darwin -fuse-ld=ld -static-libsan \
+// RUN: %clang -fsanitize=undefined -### %s 2>&1 \
+// RUN:     --target=x86_64-apple-darwin -fuse-ld=ld -static-libsan \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-STATIC-DARWIN %s
 // CHECK-UBSAN-STATIC-DARWIN: "{{.*}}ld{{(.exe)?}}"
 // CHECK-UBSAN-STATIC-DARWIN: "{{.*}}libclang_rt.ubsan_osx.a"
 
-// RUN: %clang -fsanitize=address,undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=address,undefined -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-UBSAN-LINUX %s
@@ -419,8 +419,8 @@
 // CHECK-ASAN-UBSAN-LINUX-NOT: "-lstdc++"
 // CHECK-ASAN-UBSAN-LINUX: "-lpthread"
 
-// RUN: %clangxx -fsanitize=address,undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld -stdlib=platform \
+// RUN: %clangxx -fsanitize=address,undefined -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld -stdlib=platform \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-UBSAN-LINUX-CXX %s
@@ -431,8 +431,8 @@
 // CHECK-ASAN-UBSAN-LINUX-CXX: "-lstdc++"
 // CHECK-ASAN-UBSAN-LINUX-CXX: "-lpthread"
 
-// RUN: %clangxx -fsanitize=memory,undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clangxx -fsanitize=memory,undefined -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-MSAN-UBSAN-LINUX-CXX %s
@@ -440,8 +440,8 @@
 // CHECK-MSAN-UBSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.msan-x86_64.a" "--no-whole-archive"
 // CHECK-MSAN-UBSAN-LINUX-CXX-NOT: libclang_rt.ubsan
 
-// RUN: %clangxx -fsanitize=thread,undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clangxx -fsanitize=thread,undefined -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-TSAN-UBSAN-LINUX-CXX %s
@@ -449,8 +449,8 @@
 // CHECK-TSAN-UBSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.tsan-x86_64.a" "--no-whole-archive"
 // CHECK-TSAN-UBSAN-LINUX-CXX-NOT: libclang_rt.ubsan
 
-// RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=undefined -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:     -shared \
@@ -460,8 +460,8 @@
 // CHECK-UBSAN-LINUX-SHARED-NOT: --dynamic-list
 // CHECK-UBSAN-LINUX-SHARED-NOT: libclang_rt.ubsan
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld -fsanitize=leak \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld -fsanitize=leak \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LSAN-LINUX %s
@@ -473,16 +473,16 @@
 // CHECK-LSAN-LINUX: "-lpthread"
 // CHECK-LSAN-LINUX: "-ldl"
 
-// RUN: %clang -fsanitize=leak -fno-sanitize-link-runtime %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=leak -fno-sanitize-link-runtime -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LSAN-NO-LINK-RUNTIME-LINUX %s
 //
 // CHECK-LSAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.lsan
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:  -target x86_64-unknown-linux -fuse-ld=ld -fsanitize=leak -fsanitize-coverage=func \
+// RUN: %clang -### %s 2>&1 \
+// RUN:  --target=x86_64-unknown-linux -fuse-ld=ld -fsanitize=leak -fsanitize-coverage=func \
 // RUN:  -resource-dir=%S/Inputs/resource_dir \
 // RUN:  --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LSAN-COV-LINUX %s
@@ -495,8 +495,8 @@
 // CHECK-LSAN-COV-LINUX: "-lpthread"
 // CHECK-LSAN-COV-LINUX: "-ldl"
 
-// RUN: %clang -fsanitize=leak,address %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=leak,address -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LSAN-ASAN-LINUX %s
@@ -505,8 +505,8 @@
 // CHECK-LSAN-ASAN-LINUX: libclang_rt.asan-x86_64
 // CHECK-LSAN-ASAN-LINUX-NOT: libclang_rt.lsan
 
-// RUN: %clang -fsanitize=address -fsanitize-coverage=func %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=address -fsanitize-coverage=func -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-COV-LINUX %s
@@ -516,8 +516,8 @@
 // CHECK-ASAN-COV-LINUX-NOT: "-lstdc++"
 // CHECK-ASAN-COV-LINUX: "-lpthread"
 
-// RUN: %clang -fsanitize=memory -fsanitize-coverage=func %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=memory -fsanitize-coverage=func -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-MSAN-COV-LINUX %s
@@ -527,8 +527,8 @@
 // CHECK-MSAN-COV-LINUX-NOT: "-lstdc++"
 // CHECK-MSAN-COV-LINUX: "-lpthread"
 
-// RUN: %clang -fsanitize=dataflow -fsanitize-coverage=func %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=dataflow -fsanitize-coverage=func -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-DFSAN-COV-LINUX %s
@@ -538,8 +538,8 @@
 // CHECK-DFSAN-COV-LINUX-NOT: "-lstdc++"
 // CHECK-DFSAN-COV-LINUX: "-lpthread"
 
-// RUN: %clang -fsanitize=undefined -fsanitize-coverage=func %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=undefined -fsanitize-coverage=func -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-COV-LINUX %s
@@ -548,8 +548,8 @@
 // CHECK-UBSAN-COV-LINUX-NOT: "-lstdc++"
 // CHECK-UBSAN-COV-LINUX: "-lpthread"
 
-// RUN: %clang -fsanitize-coverage=func %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize-coverage=func -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-COV-LINUX %s
@@ -559,8 +559,8 @@
 // CHECK-COV-LINUX: "-lpthread"
 
 // CFI by itself does not link runtime libraries.
-// RUN: %clang -fsanitize=cfi %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld -rtlib=platform \
+// RUN: %clang -fsanitize=cfi -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld -rtlib=platform \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-LINUX %s
@@ -569,8 +569,8 @@
 
 // CFI with diagnostics links the UBSan runtime.
 // RUN: %clang -fsanitize=cfi -fno-sanitize-trap=cfi -fsanitize-recover=cfi \
-// RUN:     %s -### -o %t.o 2>&1\
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN:     -### %s 2>&1\
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-DIAG-LINUX %s
@@ -578,8 +578,8 @@
 // CHECK-CFI-DIAG-LINUX: "--whole-archive" "{{[^"]*}}libclang_rt.ubsan_standalone-x86_64.a" "--no-whole-archive"
 
 // Cross-DSO CFI links the CFI runtime.
-// RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-LINUX %s
@@ -588,9 +588,9 @@
 // CHECK-CFI-CROSS-DSO-LINUX: -export-dynamic
 
 // Cross-DSO CFI with diagnostics links just the CFI runtime.
-// RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso %s -### -o %t.o 2>&1 \
+// RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso -### %s 2>&1 \
 // RUN:     -fno-sanitize-trap=cfi -fsanitize-recover=cfi \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-DIAG-LINUX %s
@@ -599,8 +599,8 @@
 // CHECK-CFI-CROSS-DSO-DIAG-LINUX: -export-dynamic
 
 // Cross-DSO CFI on Android does not link runtime libraries.
-// RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso %s -### -o %t.o 2>&1 \
-// RUN:     -target aarch64-linux-android -fuse-ld=ld \
+// RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso -### %s 2>&1 \
+// RUN:     --target=aarch64-linux-android -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-ANDROID %s
@@ -608,9 +608,9 @@
 // CHECK-CFI-CROSS-DSO-ANDROID-NOT: libclang_rt.cfi
 
 // Cross-DSO CFI with diagnostics on Android links just the UBSAN runtime.
-// RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso %s -### -o %t.o 2>&1 \
+// RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso -### %s 2>&1 \
 // RUN:     -fno-sanitize-trap=cfi -fsanitize-recover=cfi \
-// RUN:     -target aarch64-linux-android -fuse-ld=ld \
+// RUN:     --target=aarch64-linux-android -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-DIAG-ANDROID %s
@@ -618,9 +618,9 @@
 // CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "{{[^"]*}}libclang_rt.ubsan_standalone-aarch64-android.so"
 // CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "--export-dynamic-symbol=__cfi_check"
 
-// RUN: %clangxx -fsanitize=address %s -### -o %t.o 2>&1 \
+// RUN: %clangxx -fsanitize=address -### %s 2>&1 \
 // RUN:     -mmacosx-version-min=10.6 \
-// RUN:     -target x86_64-apple-darwin13.4.0 -fuse-ld=ld -stdlib=platform \
+// RUN:     --target=x86_64-apple-darwin13.4.0 -fuse-ld=ld -stdlib=platform \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-DARWIN106-CXX %s
@@ -628,9 +628,9 @@
 // CHECK-ASAN-DARWIN106-CXX: libclang_rt.asan_osx_dynamic.dylib
 // CHECK-ASAN-DARWIN106-CXX-NOT: -lc++abi
 
-// RUN: %clangxx -fsanitize=leak %s -### -o %t.o 2>&1 \
+// RUN: %clangxx -fsanitize=leak -### %s 2>&1 \
 // RUN:     -mmacosx-version-min=10.6 \
-// RUN:     -target x86_64-apple-darwin13.4.0 -fuse-ld=ld -stdlib=platform \
+// RUN:     --target=x86_64-apple-darwin13.4.0 -fuse-ld=ld -stdlib=platform \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LSAN-DARWIN106-CXX %s
@@ -638,8 +638,8 @@
 // CHECK-LSAN-DARWIN106-CXX: libclang_rt.lsan_osx_dynamic.dylib
 // CHECK-LSAN-DARWIN106-CXX-NOT: -lc++abi
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld -fsanitize=safe-stack \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld -fsanitize=safe-stack \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-SAFESTACK-LINUX %s
@@ -652,49 +652,49 @@
 // CHECK-SAFESTACK-LINUX: "-lpthread"
 // CHECK-SAFESTACK-LINUX: "-ldl"
 
-// RUN: %clang -fsanitize=shadow-call-stack %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=shadow-call-stack -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:   | FileCheck --check-prefix=CHECK-SHADOWCALLSTACK-LINUX-X86-64 %s
 // CHECK-SHADOWCALLSTACK-LINUX-X86-64-NOT: error:
 
-// RUN: %clang -fsanitize=shadow-call-stack %s -### -o %t.o 2>&1 \
-// RUN:     -target aarch64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=shadow-call-stack -### %s 2>&1 \
+// RUN:     --target=aarch64-unknown-linux -fuse-ld=ld \
 // RUN:   | FileCheck --check-prefix=CHECK-SHADOWCALLSTACK-LINUX-AARCH64 %s
 // CHECK-SHADOWCALLSTACK-LINUX-AARCH64: '-fsanitize=shadow-call-stack' only allowed with '-ffixed-x18'
 
-// RUN: %clang -fsanitize=shadow-call-stack %s -### -o %t.o 2>&1 \
-// RUN:     -target riscv32-unknown-elf -fuse-ld=ld \
+// RUN: %clang -fsanitize=shadow-call-stack -### %s 2>&1 \
+// RUN:     --target=riscv32-unknown-elf -fuse-ld=ld \
 // RUN:   | FileCheck --check-prefix=CHECK-SHADOWCALLSTACK-ELF-RISCV32 %s
 // CHECK-SHADOWCALLSTACK-ELF-RISCV32: '-fsanitize=shadow-call-stack' only allowed with '-ffixed-x18'
 
-// RUN: %clang -fsanitize=shadow-call-stack %s -### -o %t.o 2>&1 \
-// RUN:     -target riscv64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=shadow-call-stack -### %s 2>&1 \
+// RUN:     --target=riscv64-unknown-linux -fuse-ld=ld \
 // RUN:   | FileCheck --check-prefix=CHECK-SHADOWCALLSTACK-LINUX-RISCV64 %s
 // CHECK-SHADOWCALLSTACK-LINUX-RISCV64: '-fsanitize=shadow-call-stack' only allowed with '-ffixed-x18'
 
-// RUN: %clang -fsanitize=shadow-call-stack %s -### -o %t.o 2>&1 \
-// RUN:     -target aarch64-unknown-linux -fuse-ld=ld -ffixed-x18 \
+// RUN: %clang -fsanitize=shadow-call-stack -### %s 2>&1 \
+// RUN:     --target=aarch64-unknown-linux -fuse-ld=ld -ffixed-x18 \
 // RUN:   | FileCheck --check-prefix=CHECK-SHADOWCALLSTACK-LINUX-AARCH64-X18 %s
-// RUN: %clang -fsanitize=shadow-call-stack %s -### -o %t.o 2>&1 \
-// RUN:     -target arm64-unknown-ios -fuse-ld=ld \
+// RUN: %clang -fsanitize=shadow-call-stack -### %s 2>&1 \
+// RUN:     --target=arm64-unknown-ios -fuse-ld=ld \
 // RUN:   | FileCheck --check-prefix=CHECK-SHADOWCALLSTACK-LINUX-AARCH64-X18 %s
-// RUN: %clang -fsanitize=shadow-call-stack %s -### -o %t.o 2>&1 \
-// RUN:     -target aarch64-unknown-linux-android -fuse-ld=ld \
+// RUN: %clang -fsanitize=shadow-call-stack -### %s 2>&1 \
+// RUN:     --target=aarch64-unknown-linux-android -fuse-ld=ld \
 // RUN:   | FileCheck --check-prefix=CHECK-SHADOWCALLSTACK-LINUX-AARCH64-X18 %s
 // CHECK-SHADOWCALLSTACK-LINUX-AARCH64-X18-NOT: error:
 
-// RUN: %clang -fsanitize=shadow-call-stack %s -### -o %t.o 2>&1 \
-// RUN:     -target x86-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=shadow-call-stack -### %s 2>&1 \
+// RUN:     --target=x86-unknown-linux -fuse-ld=ld \
 // RUN:   | FileCheck --check-prefix=CHECK-SHADOWCALLSTACK-LINUX-X86 %s
 // CHECK-SHADOWCALLSTACK-LINUX-X86: error: unsupported option '-fsanitize=shadow-call-stack' for target 'x86-unknown-linux'
 
-// RUN: %clang -fsanitize=shadow-call-stack %s -### -o %t.o 2>&1 \
-// RUN:     -fsanitize=safe-stack -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=shadow-call-stack -### %s 2>&1 \
+// RUN:     -fsanitize=safe-stack --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:   | FileCheck --check-prefix=CHECK-SHADOWCALLSTACK-SAFESTACK %s
 // CHECK-SHADOWCALLSTACK-SAFESTACK-NOT: error:
 
-// RUN: %clang -fsanitize=cfi -fsanitize-stats %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=cfi -fsanitize-stats -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-STATS-LINUX %s
@@ -703,88 +703,88 @@
 // CHECK-CFI-STATS-LINUX-NOT: "--whole-archive"
 // CHECK-CFI-STATS-LINUX: "{{[^"]*}}libclang_rt.stats-x86_64.a"
 
-// RUN: %clang -fsanitize=cfi -fsanitize-stats %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-apple-darwin -fuse-ld=ld \
+// RUN: %clang -fsanitize=cfi -fsanitize-stats -### %s 2>&1 \
+// RUN:     --target=x86_64-apple-darwin -fuse-ld=ld \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-STATS-DARWIN %s
 // CHECK-CFI-STATS-DARWIN: "{{.*}}ld{{(.exe)?}}"
 // CHECK-CFI-STATS-DARWIN: "{{[^"]*}}libclang_rt.stats_client_osx.a"
 // CHECK-CFI-STATS-DARWIN: "{{[^"]*}}libclang_rt.stats_osx_dynamic.dylib"
 
-// RUN: %clang -fsanitize=cfi -fsanitize-stats %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-pc-windows \
+// RUN: %clang -fsanitize=cfi -fsanitize-stats -### %s 2>&1 \
+// RUN:     --target=x86_64-pc-windows \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-STATS-WIN64 %s
 // CHECK-CFI-STATS-WIN64: "--dependent-lib=clang_rt.stats_client{{(-x86_64)?}}.lib"
 // CHECK-CFI-STATS-WIN64: "--dependent-lib=clang_rt.stats{{(-x86_64)?}}.lib"
 // CHECK-CFI-STATS-WIN64: "--linker-option=/include:__sanitizer_stats_register"
 
-// RUN: %clang -fsanitize=cfi -fsanitize-stats %s -### -o %t.o 2>&1 \
-// RUN:     -target i686-pc-windows \
+// RUN: %clang -fsanitize=cfi -fsanitize-stats -### %s 2>&1 \
+// RUN:     --target=i686-pc-windows \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-STATS-WIN32 %s
 // CHECK-CFI-STATS-WIN32: "--dependent-lib=clang_rt.stats_client{{(-i386)?}}.lib"
 // CHECK-CFI-STATS-WIN32: "--dependent-lib=clang_rt.stats{{(-i386)?}}.lib"
 // CHECK-CFI-STATS-WIN32: "--linker-option=/include:___sanitizer_stats_register"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target arm-linux-androideabi -fuse-ld=ld -fsanitize=safe-stack \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=arm-linux-androideabi -fuse-ld=ld -fsanitize=safe-stack \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-SAFESTACK-ANDROID-ARM %s
 //
 // CHECK-SAFESTACK-ANDROID-ARM: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-SAFESTACK-ANDROID-ARM-NOT: libclang_rt.safestack
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o -shared 2>&1 \
-// RUN:     -target arm-linux-androideabi -fuse-ld=ld -fsanitize=safe-stack \
+// RUN: %clang -### %s -shared 2>&1 \
+// RUN:     --target=arm-linux-androideabi -fuse-ld=ld -fsanitize=safe-stack \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-SAFESTACK-SHARED-ANDROID-ARM %s
 //
 // CHECK-SAFESTACK-SHARED-ANDROID-ARM: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-SAFESTACK-SHARED-ANDROID-ARM-NOT: libclang_rt.safestack
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target aarch64-linux-android -fuse-ld=ld -fsanitize=safe-stack \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=aarch64-linux-android -fuse-ld=ld -fsanitize=safe-stack \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-SAFESTACK-ANDROID-AARCH64 %s
 //
 // CHECK-SAFESTACK-ANDROID-AARCH64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-SAFESTACK-ANDROID-AARCH64-NOT: libclang_rt.safestack
 
-// RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-scei-ps4 -fuse-ld=ld \
+// RUN: %clang -fsanitize=undefined -### %s 2>&1 \
+// RUN:     --target=x86_64-scei-ps4 -fuse-ld=ld \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-PS4 %s
 // CHECK-UBSAN-PS4: --dependent-lib=libSceDbgUBSanitizer_stub_weak.a
 // CHECK-UBSAN-PS4: "{{.*}}ld{{(.gold)?(.exe)?}}"
 // CHECK-UBSAN-PS4: -lSceDbgUBSanitizer_stub_weak
 
-// RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-sie-ps5 -fuse-ld=ld \
+// RUN: %clang -fsanitize=undefined -### %s 2>&1 \
+// RUN:     --target=x86_64-sie-ps5 -fuse-ld=ld \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-PS5 %s
 // CHECK-UBSAN-PS5: --dependent-lib=libSceUBSanitizer_nosubmission_stub_weak.a
 // CHECK-UBSAN-PS5: "{{.*}}ld{{(.gold)?(.exe)?}}"
 // CHECK-UBSAN-PS5: -lSceUBSanitizer_nosubmission_stub_weak
 
-// RUN: %clang -fsanitize=address %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-scei-ps4 -fuse-ld=ld \
+// RUN: %clang -fsanitize=address -### %s 2>&1 \
+// RUN:     --target=x86_64-scei-ps4 -fuse-ld=ld \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-PS4 %s
 // CHECK-ASAN-PS4: --dependent-lib=libSceDbgAddressSanitizer_stub_weak.a
 // CHECK-ASAN-PS4: "{{.*}}ld{{(.gold)?(.exe)?}}"
 // CHECK-ASAN-PS4: -lSceDbgAddressSanitizer_stub_weak
 
-// RUN: %clang -fsanitize=address %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-sie-ps5 -fuse-ld=ld \
+// RUN: %clang -fsanitize=address -### %s 2>&1 \
+// RUN:     --target=x86_64-sie-ps5 -fuse-ld=ld \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-PS5 %s
 // CHECK-ASAN-PS5: --dependent-lib=libSceAddressSanitizer_nosubmission_stub_weak.a
 // CHECK-ASAN-PS5: "{{.*}}ld{{(.gold)?(.exe)?}}"
 // CHECK-ASAN-PS5: -lSceAddressSanitizer_nosubmission_stub_weak
 
-// RUN: %clang -fsanitize=address,undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-scei-ps4 -fuse-ld=ld \
+// RUN: %clang -fsanitize=address,undefined -### %s 2>&1 \
+// RUN:     --target=x86_64-scei-ps4 -fuse-ld=ld \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-AUBSAN-PS4 %s
 // CHECK-AUBSAN-PS4-NOT: --dependent-lib=libSceDbgUBSanitizer_stub_weak.a
@@ -793,8 +793,8 @@
 // CHECK-AUBSAN-PS4: "{{.*}}ld{{(.gold)?(.exe)?}}"
 // CHECK-AUBSAN-PS4: -lSceDbgAddressSanitizer_stub_weak
 
-// RUN: %clang -fsanitize=address,undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-sie-ps5 -fuse-ld=ld \
+// RUN: %clang -fsanitize=address,undefined -### %s 2>&1 \
+// RUN:     --target=x86_64-sie-ps5 -fuse-ld=ld \
 // RUN:     -shared \
 // RUN:   | FileCheck --check-prefix=CHECK-AUBSAN-PS5 %s
 // CHECK-AUBSAN-PS5-NOT: --dependent-lib=libSceUBSanitizer_nosubmission_stub_weak.a
@@ -803,22 +803,22 @@
 // CHECK-AUBSAN-PS5: "{{.*}}ld{{(.gold)?(.exe)?}}"
 // CHECK-AUBSAN-PS5: -lSceAddressSanitizer_nosubmission_stub_weak
 
-// RUN: %clang -fsanitize=address,undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-scei-ps4 -fuse-ld=ld \
+// RUN: %clang -fsanitize=address,undefined -### %s 2>&1 \
+// RUN:     --target=x86_64-scei-ps4 -fuse-ld=ld \
 // RUN:     -shared \
 // RUN:     -nostdlib \
 // RUN:   | FileCheck --check-prefix=CHECK-NOLIB-PS4 %s
 // CHECK-NOLIB-PS4-NOT: SceDbgAddressSanitizer_stub_weak
 
-// RUN: %clang -fsanitize=address,undefined %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-sie-ps5 -fuse-ld=ld \
+// RUN: %clang -fsanitize=address,undefined -### %s 2>&1 \
+// RUN:     --target=x86_64-sie-ps5 -fuse-ld=ld \
 // RUN:     -shared \
 // RUN:     -nostdlib \
 // RUN:   | FileCheck --check-prefix=CHECK-NOLIB-PS5 %s
 // CHECK-NOLIB-PS5-NOT: SceAddressSanitizer_nosubmission_stub_weak
 
-// RUN: %clang -fsanitize=scudo %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=scudo -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-SCUDO-LINUX %s
@@ -829,8 +829,8 @@
 // CHECK-SCUDO-LINUX: "-lpthread"
 // CHECK-SCUDO-LINUX: "-ldl"
 
-// RUN: %clang -fsanitize=scudo -fsanitize-minimal-runtime %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld \
+// RUN: %clang -fsanitize=scudo -fsanitize-minimal-runtime -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-SCUDO-MINIMAL-LINUX %s
@@ -839,8 +839,8 @@
 // CHECK-SCUDO-MINIMAL-LINUX: "--whole-archive" "{{.*}}libclang_rt.scudo_minimal-i386.a" "--no-whole-archive"
 // CHECK-SCUDO-MINIMAL-LINUX: "-lpthread"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.so -shared 2>&1 \
-// RUN:     -target i386-unknown-linux -fuse-ld=ld -fsanitize=scudo -shared-libsan \
+// RUN: %clang -### %s -o %t.so -shared 2>&1 \
+// RUN:     --target=i386-unknown-linux -fuse-ld=ld -fsanitize=scudo -shared-libsan \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-SCUDO-SHARED-LINUX %s
@@ -855,8 +855,8 @@
 // CHECK-SCUDO-SHARED-LINUX-NOT: "--export-dynamic"
 // CHECK-SCUDO-SHARED-LINUX-NOT: "--dynamic-list"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target arm-linux-androideabi -fuse-ld=ld -fsanitize=scudo \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=arm-linux-androideabi -fuse-ld=ld -fsanitize=scudo \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-SCUDO-ANDROID %s
 //
@@ -867,8 +867,8 @@
 // CHECK-SCUDO-ANDROID: libclang_rt.scudo-arm-android.so"
 // CHECK-SCUDO-ANDROID-NOT: "-lpthread"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target arm-linux-androideabi -fuse-ld=ld -fsanitize=scudo \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=arm-linux-androideabi -fuse-ld=ld -fsanitize=scudo \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:     -static-libsan \
 // RUN:   | FileCheck --check-prefix=CHECK-SCUDO-ANDROID-STATIC %s
@@ -879,8 +879,8 @@
 // CHECK-SCUDO-ANDROID-STATIC-NOT: "-lpthread"
 // CHECK-SCUDO-ANDROID-STATIC-NOT: "-lrt"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-HWASAN-X86-64-LINUX %s
@@ -896,8 +896,8 @@
 // CHECK-HWASAN-X86-64-LINUX: "-lrt"
 // CHECK-HWASAN-X86-64-LINUX: "-ldl"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
 // RUN:     -shared-libsan -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-SHARED-HWASAN-X86-64-LINUX %s
@@ -912,8 +912,8 @@
 // CHECK-SHARED-HWASAN-X86-64-LINUX-NOT: "--export-dynamic"
 // CHECK-SHARED-HWASAN-X86-64-LINUX-NOT: "--dynamic-list"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.so -shared 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
+// RUN: %clang -### %s -o %t.so -shared 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
 // RUN:     -shared-libsan -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-DSO-SHARED-HWASAN-X86-64-LINUX %s
@@ -928,8 +928,8 @@
 // CHECK-DSO-SHARED-HWASAN-X86-64-LINUX-NOT: "--export-dynamic"
 // CHECK-DSO-SHARED-HWASAN-X86-64-LINUX-NOT: "--dynamic-list"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target aarch64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=aarch64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-HWASAN-AARCH64-LINUX %s
@@ -945,8 +945,8 @@
 // CHECK-HWASAN-AARCH64-LINUX: "-lrt"
 // CHECK-HWASAN-AARCH64-LINUX: "-ldl"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target aarch64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=aarch64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
 // RUN:     -shared-libsan \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
@@ -962,8 +962,8 @@
 // CHECK-SHARED-HWASAN-AARCH64-LINUX-NOT: "--export-dynamic"
 // CHECK-SHARED-HWASAN-AARCH64-LINUX-NOT: "--dynamic-list"
 
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.so -shared 2>&1 \
-// RUN:     -target aarch64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
+// RUN: %clang -### %s -o %t.so -shared 2>&1 \
+// RUN:     --target=aarch64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
 // RUN:     -shared-libsan -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-DSO-SHARED-HWASAN-AARCH64-LINUX %s
diff --git a/clang/test/Driver/solaris-ld-values.c b/clang/test/Driver/solaris-ld-values.c
index 95601da..d7b6734 100644
--- a/clang/test/Driver/solaris-ld-values.c
+++ b/clang/test/Driver/solaris-ld-values.c
@@ -3,7 +3,7 @@
 // independent of the host system.
 
 // Check sparc-sun-solaris2.11, 32bit
-// RUN: %clang -no-canonical-prefixes -ansi %s -### -o %t.o 2>&1 \
+// RUN: %clang -ansi -### %s 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
@@ -11,7 +11,7 @@
 // CHECK-LD-SPARC32-ANSI: values-Xc.o
 // CHECK-LD-SPARC32-ANSI: values-xpg6.o
 
-// RUN: %clang -no-canonical-prefixes -std=c89 %s -### -o %t.o 2>&1 \
+// RUN: %clang -std=c89 -### %s 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
@@ -19,7 +19,7 @@
 // CHECK-LD-SPARC32-C89: values-Xc.o
 // CHECK-LD-SPARC32-C89: values-xpg4.o
 
-// RUN: %clang -no-canonical-prefixes -std=c90 %s -### -o %t.o 2>&1 \
+// RUN: %clang -std=c90 -### %s 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
@@ -27,7 +27,7 @@
 // CHECK-LD-SPARC32-C90: values-Xc.o
 // CHECK-LD-SPARC32-C90: values-xpg4.o
 
-// RUN: %clang -no-canonical-prefixes -std=iso9899:199409 %s -### -o %t.o 2>&1 \
+// RUN: %clang -std=iso9899:199409 -### %s 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
@@ -35,7 +35,7 @@
 // CHECK-LD-SPARC32-C94: values-Xc.o
 // CHECK-LD-SPARC32-C94: values-xpg4.o
 
-// RUN: %clang -no-canonical-prefixes -std=c11 %s -### -o %t.o 2>&1 \
+// RUN: %clang -std=c11 -### %s 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
@@ -43,7 +43,7 @@
 // CHECK-LD-SPARC32-C11: values-Xc.o
 // CHECK-LD-SPARC32-C11: values-xpg6.o
 
-// RUN: %clang -no-canonical-prefixes -std=gnu89 %s -### -o %t.o 2>&1 \
+// RUN: %clang -std=gnu89 -### %s 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
@@ -51,7 +51,7 @@
 // CHECK-LD-SPARC32-GNU89: values-Xa.o
 // CHECK-LD-SPARC32-GNU89: values-xpg4.o
 
-// RUN: %clang -no-canonical-prefixes -std=gnu90 %s -### -o %t.o 2>&1 \
+// RUN: %clang -std=gnu90 -### %s 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
@@ -59,7 +59,7 @@
 // CHECK-LD-SPARC32-GNU90: values-Xa.o
 // CHECK-LD-SPARC32-GNU90: values-xpg4.o
 
-// RUN: %clang -no-canonical-prefixes -std=gnu11 %s -### -o %t.o 2>&1 \
+// RUN: %clang -std=gnu11 -### %s 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
@@ -68,7 +68,7 @@
 // CHECK-LD-SPARC32-GNU11: values-xpg6.o
 
 // Check i386-pc-solaris2.11, 32bit
-// RUN: %clang -no-canonical-prefixes -ansi %s -### -o %t.o 2>&1 \
+// RUN: %clang -ansi -### %s 2>&1 \
 // RUN:     --target=i386-pc-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_x86_tree \
diff --git a/clang/test/Driver/solaris-ld-values.cpp b/clang/test/Driver/solaris-ld-values.cpp
index a190f12..4230fab 100644
--- a/clang/test/Driver/solaris-ld-values.cpp
+++ b/clang/test/Driver/solaris-ld-values.cpp
@@ -3,7 +3,7 @@
 // independent of the host system.
 
 // Check sparc-sun-solaris2.11, 32bit
-// RUN: %clang -no-canonical-prefixes -ansi %s -### -o %t.o 2>&1 \
+// RUN: %clang -ansi -### %s 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
@@ -11,7 +11,7 @@
 // CHECK-LD-SPARC32-ANSI: values-Xc.o
 // CHECK-LD-SPARC32-ANSI: values-xpg6.o
 
-// RUN: %clang -no-canonical-prefixes -std=c++98 %s -### -o %t.o 2>&1 \
+// RUN: %clang -std=c++98 -### %s 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
@@ -19,7 +19,7 @@
 // CHECK-LD-SPARC32-CPP98: values-Xc.o
 // CHECK-LD-SPARC32-CPP98: values-xpg6.o
 
-// RUN: %clang -no-canonical-prefixes -std=c++11 %s -### -o %t.o 2>&1 \
+// RUN: %clang -std=c++11 -### %s 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
@@ -27,7 +27,7 @@
 // CHECK-LD-SPARC32-CPP11: values-Xc.o
 // CHECK-LD-SPARC32-CPP11: values-xpg6.o
 
-// RUN: %clang -no-canonical-prefixes -std=gnu++98 %s -### -o %t.o 2>&1 \
+// RUN: %clang -std=gnu++98 -### %s 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
@@ -36,7 +36,7 @@
 // CHECK-LD-SPARC32-GNUPP98: values-xpg6.o
 
 // Check i386-pc-solaris2.11, 32bit
-// RUN: %clang -no-canonical-prefixes -ANSI %s -### -o %t.o 2>&1 \
+// RUN: %clang -ANSI -### %s 2>&1 \
 // RUN:     --target=i386-pc-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/solaris_x86_tree \
diff --git a/clang/test/Driver/sparc-as.c b/clang/test/Driver/sparc-as.c
index 8405cfd..dca78f5 100644
--- a/clang/test/Driver/sparc-as.c
+++ b/clang/test/Driver/sparc-as.c
@@ -1,171 +1,171 @@
 // Make sure Sparc does use the integrated assembler by default.
 
-// RUN: %clang -target sparc-linux -### -c %s 2>&1 \
+// RUN: %clang --target=sparc-linux -### -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix=IAS %s
 
-// RUN: %clang -target sparc-linux -fintegrated-as -### -c %s 2>&1 \
+// RUN: %clang --target=sparc-linux -fintegrated-as -### -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix=IAS %s
 
-// RUN: %clang -target sparc-linux -fno-integrated-as -### -c %s 2>&1 \
+// RUN: %clang --target=sparc-linux -fno-integrated-as -### -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix=NO-IAS %s
 
 // IAS-NOT: "-no-integrated-as"
 // NO-IAS: "-no-integrated-as"
 
-// RUN: %clang -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC %s
 
-// RUN: %clang -mcpu=v8 -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=v8 --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: %clang -mcpu=supersparc -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=supersparc --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: %clang -mcpu=sparclite -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=sparclite --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-SPARCLITE %s
 
-// RUN: %clang -mcpu=f934 -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=f934 --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-SPARCLITE %s
 
-// RUN: %clang -mcpu=hypersparc -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=hypersparc --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: %clang -mcpu=sparclite86x -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=sparclite86x --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-SPARCLITE %s
 
-// RUN: %clang -mcpu=sparclet -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=sparclet --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-SPARCLET %s
 
-// RUN: %clang -mcpu=tsc701 -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=tsc701 --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-SPARCLET %s
 
-// RUN: %clang -mcpu=v9 -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=v9 --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8PLUS %s
 
-// RUN: %clang -mcpu=ultrasparc -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ultrasparc --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8PLUS %s
 
-// RUN: %clang -mcpu=ultrasparc3 -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ultrasparc3 --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8PLUS %s
 
-// RUN: %clang -mcpu=niagara -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=niagara --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8PLUSB %s
 
-// RUN: %clang -mcpu=niagara2 -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=niagara2 --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8PLUSB %s
 
-// RUN: %clang -mcpu=niagara3 -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=niagara3 --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8PLUSD %s
 
-// RUN: %clang -mcpu=niagara4 -no-canonical-prefixes -target sparc--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=niagara4 --target=sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8PLUSD %s
 
-// RUN: %clang -mcpu=ma2100 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ma2100 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=ma2150 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ma2150 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=ma2155 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ma2155 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=ma2450 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ma2450 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=ma2455 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ma2455 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=ma2x5x -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ma2x5x --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=ma2080 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ma2080 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=ma2085 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ma2085 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=ma2480 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ma2480 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=ma2485 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ma2485 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=ma2x8x -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ma2x8x --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=myriad2 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=myriad2 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=myriad2.1 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=myriad2.1 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=myriad2.2 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=myriad2.2 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=myriad2.3 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=myriad2.3 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=leon2 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=leon2 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: %clang -mcpu=at697e -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=at697e --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: %clang -mcpu=at697f -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=at697f --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: %clang -mcpu=leon3 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=leon3 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=ut699 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ut699 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: %clang -mcpu=gr712rc -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=gr712rc --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=leon4 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=leon4 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
-// RUN: %clang -mcpu=gr740 -no-canonical-prefixes -target sparc \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=gr740 --target=sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-LEON %s
 
 // SPARC: as{{.*}}" "-32" "-Av8" "-o"
diff --git a/clang/test/Driver/sparcv9-as.c b/clang/test/Driver/sparcv9-as.c
index 5ce9abe..4424b56 100644
--- a/clang/test/Driver/sparcv9-as.c
+++ b/clang/test/Driver/sparcv9-as.c
@@ -1,47 +1,47 @@
 // Make sure SparcV9 does use the integrated assembler by default.
 
-// RUN: %clang -target sparcv9-linux -### -c %s 2>&1 \
+// RUN: %clang --target=sparcv9-linux -### -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix=IAS %s
 
-// RUN: %clang -target sparcv9-linux -fintegrated-as -### -c %s 2>&1 \
+// RUN: %clang --target=sparcv9-linux -fintegrated-as -### -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix=IAS %s
 
-// RUN: %clang -target sparcv9-linux -fno-integrated-as -### -c %s 2>&1 \
+// RUN: %clang --target=sparcv9-linux -fno-integrated-as -### -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix=NO-IAS %s
 
 // IAS-NOT: "-no-integrated-as"
 // NO-IAS: "-no-integrated-as"
 
-// RUN: %clang -no-canonical-prefixes -target sparcv9--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang --target=sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC %s
 
-// RUN: %clang -mcpu=v9 -no-canonical-prefixes -target sparcv9--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=v9 --target=sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V9 %s
 
-// RUN: %clang -mcpu=ultrasparc -no-canonical-prefixes -target sparcv9--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ultrasparc --target=sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V9 %s
 
-// RUN: %clang -mcpu=ultrasparc3 -no-canonical-prefixes -target sparcv9--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=ultrasparc3 --target=sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V9 %s
 
-// RUN: %clang -mcpu=niagara -no-canonical-prefixes -target sparcv9--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=niagara --target=sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V9B %s
 
-// RUN: %clang -mcpu=niagara2 -no-canonical-prefixes -target sparcv9--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=niagara2 --target=sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V9B %s
 
-// RUN: %clang -mcpu=niagara3 -no-canonical-prefixes -target sparcv9--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=niagara3 --target=sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V9D %s
 
-// RUN: %clang -mcpu=niagara4 -no-canonical-prefixes -target sparcv9--netbsd \
-// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: %clang -mcpu=niagara4 --target=sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree -### %s 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V9D %s
 
 // SPARC: as{{.*}}" "-64" "-Av9" "-o"
@@ -49,35 +49,35 @@
 // SPARC-V9B: as{{.*}}" "-64" "-Av9b" "-o"
 // SPARC-V9D: as{{.*}}" "-64" "-Av9d" "-o"
 
-// RUN: not %clang -mcpu=v8 -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: not %clang -mcpu=v8 --target=sparcv9--netbsd \
 // RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: not %clang -mcpu=supersparc -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: not %clang -mcpu=supersparc --target=sparcv9--netbsd \
 // RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: not %clang -mcpu=sparclite -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: not %clang -mcpu=sparclite --target=sparcv9--netbsd \
 // RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: not %clang -mcpu=f934 -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: not %clang -mcpu=f934 --target=sparcv9--netbsd \
 // RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: not %clang -mcpu=hypersparc -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: not %clang -mcpu=hypersparc --target=sparcv9--netbsd \
 // RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: not %clang -mcpu=sparclite86x -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: not %clang -mcpu=sparclite86x --target=sparcv9--netbsd \
 // RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: not %clang -mcpu=sparclet -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: not %clang -mcpu=sparclet --target=sparcv9--netbsd \
 // RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
-// RUN: not %clang -mcpu=tsc701 -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: not %clang -mcpu=tsc701 --target=sparcv9--netbsd \
 // RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8 %s
 
diff --git a/clang/test/Driver/split-stack-ld.c b/clang/test/Driver/split-stack-ld.c
index 3441d54..f01b139 100644
--- a/clang/test/Driver/split-stack-ld.c
+++ b/clang/test/Driver/split-stack-ld.c
@@ -1,15 +1,15 @@
 // Test split stack ld flags.
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux -fsplit-stack \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=i386-unknown-linux -fsplit-stack \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LINUX-I386 %s
 //
 // CHECK-LINUX-I386: "--wrap=pthread_create"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     -target x86_64-unknown-linux -fsplit-stack \
+// RUN: %clang -### %s 2>&1 \
+// RUN:     --target=x86_64-unknown-linux -fsplit-stack \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-LINUX-X86-64 %s
diff --git a/clang/test/Driver/target-override.c b/clang/test/Driver/target-override.c
index ddda8aa..aef89cc 100644
--- a/clang/test/Driver/target-override.c
+++ b/clang/test/Driver/target-override.c
@@ -6,10 +6,10 @@
 
 // Check if invocation of "foo-clang" adds option "-target foo".
 //
-// RUN: %t/i386-clang -c -no-canonical-prefixes %s -### 2>&1 | FileCheck -check-prefix CHECK-TG1 %s
+// RUN: %t/i386-clang -c -### %s 2>&1 | FileCheck -check-prefix CHECK-TG1 %s
 // CHECK-TG1: Target: i386
 
-// Check if invocation of "foo-clang -target bar" overrides option "-target foo".
+// Check if invocation of "foo-clang --target=bar" overrides option "-target foo".
 //
-// RUN: %t/i386-clang -c -no-canonical-prefixes -target x86_64 %s -### 2>&1 | FileCheck -check-prefix CHECK-TG2 %s
+// RUN: %t/i386-clang -c --target=x86_64 -### %s 2>&1 | FileCheck -check-prefix CHECK-TG2 %s
 // CHECK-TG2: Target: x86_64
diff --git a/clang/test/Driver/target.c b/clang/test/Driver/target.c
index a46ba16..c776277 100644
--- a/clang/test/Driver/target.c
+++ b/clang/test/Driver/target.c
@@ -1,9 +1,9 @@
-// RUN: %clang -no-canonical-prefixes --target=unknown-unknown-unknown -c %s \
-// RUN:   -o %t.o -### 2>&1 | FileCheck %s
+// RUN: %clang --target=unknown-unknown-unknown -c %s \
+// RUN:   -### 2>&1 | FileCheck %s
 //
 // Ensure we get a crazy triple here as we asked for one.
 // CHECK: Target: unknown-unknown-unknown
 //
 // Also check that the legacy spelling works.
-// RUN: %clang -no-canonical-prefixes -target unknown-unknown-unknown -c %s \
-// RUN:   -o %t.o -### 2>&1 | FileCheck %s
+// RUN: %clang --target=unknown-unknown-unknown -c %s \
+// RUN:   -### 2>&1 | FileCheck %s
diff --git a/clang/test/Driver/ve-toolchain.c b/clang/test/Driver/ve-toolchain.c
index 753dee19..32e2576 100644
--- a/clang/test/Driver/ve-toolchain.c
+++ b/clang/test/Driver/ve-toolchain.c
@@ -5,45 +5,45 @@
 ///-----------------------------------------------------------------------------
 /// Checking dwarf-version
 
-// RUN: %clang -### -g -target ve %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
+// RUN: %clang -### -g --target=ve %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
 // DWARF_VER: "-dwarf-version=5"
 
 ///-----------------------------------------------------------------------------
 /// Checking include-path
 
-// RUN: %clang -### -target ve --sysroot %S/Inputs/basic_ve_tree %s \
+// RUN: %clang -### --target=ve --sysroot %S/Inputs/basic_ve_tree %s \
 // RUN:     -resource-dir=%S/Inputs/basic_ve_tree/resource_dir \
 // RUN:     2>&1 | FileCheck -check-prefix=DEFINC %s
-// DEFINC: clang{{.*}} "-cc1"
+// DEFINC: "-cc1"
 // DEFINC-SAME: "-nostdsysteminc"
 // DEFINC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // DEFINC-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
 // DEFINC-SAME: "-internal-isystem" "[[RESOURCE_DIR]]/include"
 // DEFINC-SAME: "-internal-isystem" "[[SYSROOT]]/opt/nec/ve/include"
 
-// RUN: %clang -### -target ve --sysroot %S/Inputs/basic_ve_tree %s \
+// RUN: %clang -### --target=ve --sysroot %S/Inputs/basic_ve_tree %s \
 // RUN:     -resource-dir=%S/Inputs/basic_ve_tree/resource_dir \
 // RUN:     -nostdlibinc 2>&1 | FileCheck -check-prefix=NOSTDLIBINC %s
-// NOSTDLIBINC: clang{{.*}} "-cc1"
+// NOSTDLIBINC: "-cc1"
 // NOSTDLIBINC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // NOSTDLIBINC-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
 // NOSTDLIBINC-SAME: "-internal-isystem" "[[RESOURCE_DIR]]/include"
 // NOSTDLIBINC-NOT: "-internal-isystem" "[[SYSROOT]]/opt/nec/ve/include"
 
-// RUN: %clang -### -target ve --sysroot %S/Inputs/basic_ve_tree %s \
+// RUN: %clang -### --target=ve --sysroot %S/Inputs/basic_ve_tree %s \
 // RUN:     -resource-dir=%S/Inputs/basic_ve_tree/resource_dir \
 // RUN:     -nobuiltininc 2>&1 | FileCheck -check-prefix=NOBUILTININC %s
-// NOBUILTININC: clang{{.*}} "-cc1"
+// NOBUILTININC: "-cc1"
 // NOBUILTININC-SAME: "-nobuiltininc"
 // NOBUILTININC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // NOBUILTININC-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
 // NOBUILTININC-NOT: "-internal-isystem" "[[RESOURCE_DIR]]/include"
 // NOBUILTININC-SAME: "-internal-isystem" "[[SYSROOT]]/opt/nec/ve/include"
 
-// RUN: %clang -### -target ve --sysroot %S/Inputs/basic_ve_tree %s \
+// RUN: %clang -### --target=ve --sysroot %S/Inputs/basic_ve_tree %s \
 // RUN:     -resource-dir=%S/Inputs/basic_ve_tree/resource_dir \
 // RUN:     -nostdinc 2>&1 | FileCheck -check-prefix=NOSTDINC %s
-// NOSTDINC: clang{{.*}} "-cc1"
+// NOSTDINC: "-cc1"
 // NOSTDINC-SAME: "-nobuiltininc"
 // NOSTDINC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // NOSTDINC-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
@@ -53,21 +53,21 @@
 ///-----------------------------------------------------------------------------
 /// Checking -faddrsig
 
-// RUN: %clang -### -target ve %s 2>&1 | FileCheck -check-prefix=DEFADDRSIG %s
-// DEFADDRSIG: clang{{.*}} "-cc1"
+// RUN: %clang -### --target=ve %s 2>&1 | FileCheck -check-prefix=DEFADDRSIG %s
+// DEFADDRSIG: "-cc1"
 // DEFADDRSIG-NOT: "-faddrsig"
 
 ///-----------------------------------------------------------------------------
 /// Checking -fintegrated-as
 
-// RUN: %clang -### -target ve \
+// RUN: %clang -### --target=ve \
 // RUN:    -x assembler -fuse-ld=ld %s 2>&1 | \
 // RUN:    FileCheck -check-prefix=AS %s
-// RUN: %clang -### -target ve \
+// RUN: %clang -### --target=ve \
 // RUN:    -fno-integrated-as -fuse-ld=ld -x assembler %s 2>&1 | \
 // RUN:    FileCheck -check-prefix=NAS %s
 
-// AS: clang{{.*}} "-cc1as"
+// AS: "-cc1as"
 // AS: nld{{.*}}
 
 // NAS: nas{{.*}}
@@ -80,7 +80,7 @@
 ///  - nld VE specific options
 ///  - sjlj exception
 
-// RUN: %clang -### -target ve-unknown-linux-gnu \
+// RUN: %clang -### --target=ve-unknown-linux-gnu \
 // RUN:     --sysroot %S/Inputs/basic_ve_tree \
 // RUN:     -resource-dir=%S/Inputs/basic_ve_tree/resource_dir \
 // RUN:     --unwindlib=none \
diff --git a/clang/test/Driver/ve-toolchain.cpp b/clang/test/Driver/ve-toolchain.cpp
index 4b2b9c5..5a33d5e 100644
--- a/clang/test/Driver/ve-toolchain.cpp
+++ b/clang/test/Driver/ve-toolchain.cpp
@@ -5,19 +5,19 @@
 ///-----------------------------------------------------------------------------
 /// Checking dwarf-version
 
-// RUN: %clangxx -### -g -target ve-unknown-linux-gnu \
+// RUN: %clangxx -### -g --target=ve-unknown-linux-gnu \
 // RUN:     %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
 // DWARF_VER: "-dwarf-version=5"
 
 ///-----------------------------------------------------------------------------
 /// Checking include-path
 
-// RUN: %clangxx -### -target ve-unknown-linux-gnu \
+// RUN: %clangxx -### --target=ve-unknown-linux-gnu \
 // RUN:     --sysroot %S/Inputs/basic_ve_tree %s \
 // RUN:     -ccc-install-dir %S/Inputs/basic_ve_tree/bin \
 // RUN:     -resource-dir=%S/Inputs/basic_ve_tree/resource_dir \
 // RUN:     2>&1 | FileCheck -check-prefix=DEFINC %s
-// DEFINC: clang{{.*}} "-cc1"
+// DEFINC: "-cc1"
 // DEFINC-SAME: "-nostdsysteminc"
 // DEFINC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // DEFINC-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
@@ -26,12 +26,12 @@
 // DEFINC-SAME: "-internal-isystem" "[[RESOURCE_DIR]]/include"
 // DEFINC-SAME: "-internal-isystem" "[[SYSROOT]]/opt/nec/ve/include"
 
-// RUN: %clangxx -### -target ve-unknown-linux-gnu \
+// RUN: %clangxx -### --target=ve-unknown-linux-gnu \
 // RUN:     --sysroot %S/Inputs/basic_ve_tree %s \
 // RUN:     -ccc-install-dir %S/Inputs/basic_ve_tree/bin \
 // RUN:     -resource-dir=%S/Inputs/basic_ve_tree/resource_dir \
 // RUN:     -nostdlibinc 2>&1 | FileCheck -check-prefix=NOSTDLIBINC %s
-// NOSTDLIBINC: clang{{.*}} "-cc1"
+// NOSTDLIBINC: "-cc1"
 // NOSTDLIBINC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // NOSTDLIBINC-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
 // NOSTDLIBINC-NOT: "-internal-isystem" "{{.*}}/bin/../include/ve-unknown-linux-gnu/c++/v1"
@@ -39,12 +39,12 @@
 // NOSTDLIBINC-SAME: "-internal-isystem" "[[RESOURCE_DIR]]/include"
 // NOSTDLIBINC-NOT: "-internal-isystem" "[[SYSROOT]]/opt/nec/ve/include"
 
-// RUN: %clangxx -### -target ve-unknown-linux-gnu \
+// RUN: %clangxx -### --target=ve-unknown-linux-gnu \
 // RUN:     --sysroot %S/Inputs/basic_ve_tree %s \
 // RUN:     -ccc-install-dir %S/Inputs/basic_ve_tree/bin \
 // RUN:     -resource-dir=%S/Inputs/basic_ve_tree/resource_dir \
 // RUN:     -nobuiltininc 2>&1 | FileCheck -check-prefix=NOBUILTININC %s
-// NOBUILTININC: clang{{.*}} "-cc1"
+// NOBUILTININC: "-cc1"
 // NOBUILTININC-SAME: "-nobuiltininc"
 // NOBUILTININC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // NOBUILTININC-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
@@ -53,12 +53,12 @@
 // NOBUILTININC-NOT: "-internal-isystem" "[[RESOURCE_DIR]]/include"
 // NOBUILTININC-SAME: "-internal-isystem" "[[SYSROOT]]/opt/nec/ve/include"
 
-// RUN: %clangxx -### -target ve-unknown-linux-gnu \
+// RUN: %clangxx -### --target=ve-unknown-linux-gnu \
 // RUN:     --sysroot %S/Inputs/basic_ve_tree %s \
 // RUN:     -ccc-install-dir %S/Inputs/basic_ve_tree/bin \
 // RUN:     -resource-dir=%S/Inputs/basic_ve_tree/resource_dir \
 // RUN:     -nostdinc 2>&1 | FileCheck -check-prefix=NOSTDINC %s
-// NOSTDINC: clang{{.*}} "-cc1"
+// NOSTDINC: "-cc1"
 // NOSTDINC-SAME: "-nobuiltininc"
 // NOSTDINC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // NOSTDINC-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
@@ -67,12 +67,12 @@
 // NOSTDINC-NOT: "-internal-isystem" "[[RESOURCE_DIR]]/include"
 // NOSTDINC-NOT: "-internal-isystem" "[[SYSROOT]]/opt/nec/ve/include"
 
-// RUN: %clangxx -### -target ve-unknown-linux-gnu \
+// RUN: %clangxx -### --target=ve-unknown-linux-gnu \
 // RUN:     --sysroot %S/Inputs/basic_ve_tree %s \
 // RUN:     -ccc-install-dir %S/Inputs/basic_ve_tree/bin \
 // RUN:     -resource-dir=%S/Inputs/basic_ve_tree/resource_dir \
 // RUN:     -nostdinc++ 2>&1 | FileCheck -check-prefix=NOSTDINCXX %s
-// NOSTDINCXX: clang{{.*}} "-cc1"
+// NOSTDINCXX: "-cc1"
 // NOSTDINCXX-SAME: "-nostdinc++"
 // NOSTDINCXX-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // NOSTDINCXX-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
@@ -85,12 +85,12 @@
 /// Checking environment variable NCC_CPLUS_INCLUDE_PATH
 
 // RUN: env NCC_CPLUS_INCLUDE_PATH=/test/test %clangxx -### \
-// RUN:     -target ve-unknown-linux-gnu %s \
+// RUN:     --target=ve-unknown-linux-gnu %s \
 // RUN:     --sysroot %S/Inputs/basic_ve_tree \
 // RUN:     -resource-dir=%S/Inputs/basic_ve_tree/resource_dir \
 // RUN:     2>&1 | FileCheck -check-prefix=DEFINCENV %s
 
-// DEFINCENV: clang{{.*}} "-cc1"
+// DEFINCENV: "-cc1"
 // DEFINCENV-SAME: "-nostdsysteminc"
 // DEFINCENV-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // DEFINCENV-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
@@ -101,22 +101,22 @@
 ///-----------------------------------------------------------------------------
 /// Checking -faddrsig
 
-// RUN: %clangxx -### -target ve-unknown-linux-gnu \
+// RUN: %clangxx -### --target=ve-unknown-linux-gnu \
 // RUN:     %s 2>&1 | FileCheck -check-prefix=DEFADDRSIG %s
-// DEFADDRSIG: clang{{.*}} "-cc1"
+// DEFADDRSIG: "-cc1"
 // DEFADDRSIG-NOT: "-faddrsig"
 
 ///-----------------------------------------------------------------------------
 /// Checking -fintegrated-as
 
-// RUN: %clangxx -### -target ve-unknown-linux-gnu \
+// RUN: %clangxx -### --target=ve-unknown-linux-gnu \
 // RUN:     -x assembler -fuse-ld=ld %s 2>&1 | \
 // RUN:    FileCheck -check-prefix=AS %s
-// RUN: %clangxx -### -target ve-unknown-linux-gnu \
+// RUN: %clangxx -### --target=ve-unknown-linux-gnu \
 // RUN:     -fno-integrated-as -x assembler -fuse-ld=ld %s 2>&1 | \
 // RUN:    FileCheck -check-prefix=NAS %s
 
-// AS: clang{{.*}} "-cc1as"
+// AS: "-cc1as"
 // AS: nld{{.*}}
 
 // NAS: nas{{.*}}
@@ -129,14 +129,14 @@
 ///  - nld VE specific options
 ///  - sjlj exception
 
-// RUN: %clangxx -### -target ve-unknown-linux-gnu \
+// RUN: %clangxx -### --target=ve-unknown-linux-gnu \
 // RUN:     --sysroot %S/Inputs/basic_ve_tree \
 // RUN:     -fuse-ld=ld \
 // RUN:     -resource-dir=%S/Inputs/basic_ve_tree/resource_dir \
 // RUN:     --unwindlib=none \
 // RUN:     --stdlib=libc++ %s 2>&1 | FileCheck -check-prefix=DEF %s
 
-// DEF:      clang{{.*}}" "-cc1"
+// DEF:      "-cc1"
 // DEF-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // DEF-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
 // DEF-SAME: "-exception-model=sjlj"
diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c
index c7eeec9..7e06044a 100644
--- a/clang/test/Driver/wasm-toolchain.c
+++ b/clang/test/Driver/wasm-toolchain.c
@@ -1,113 +1,113 @@
 // A basic clang -cc1 command-line. WebAssembly is somewhat special in
 // enabling -fvisibility=hidden by default.
 
-// RUN: %clang %s -### -no-canonical-prefixes -target wasm32-unknown-unknown 2>&1 \
+// RUN: %clang -### %s --target=wasm32-unknown-unknown 2>&1 \
 // RUN:   | FileCheck -check-prefix=CC1 %s
-// CC1: clang{{.*}} "-cc1" "-triple" "wasm32-unknown-unknown" {{.*}} "-fvisibility" "hidden" {{.*}}
+// CC1: "-cc1" "-triple" "wasm32-unknown-unknown" {{.*}} "-fvisibility" "hidden" {{.*}}
 
 // Ditto, but ensure that a user -fvisibility=default disables the default
 // -fvisibility=hidden.
 
-// RUN: %clang %s -### -target wasm32-unknown-unknown -fvisibility=default 2>&1 \
+// RUN: %clang -### %s --target=wasm32-unknown-unknown -fvisibility=default 2>&1 \
 // RUN:   | FileCheck -check-prefix=FVISIBILITY_DEFAULT %s
 // FVISIBILITY_DEFAULT-NOT: hidden
 
 // A basic C link command-line with unknown OS.
 
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s 2>&1 \
+// RUN: %clang -### --target=wasm32-unknown-unknown --sysroot=/foo %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK %s
-// LINK: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C link command-line with optimization with unknown OS.
 
-// RUN: %clang -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s 2>&1 \
+// RUN: %clang -### -O2 --target=wasm32-unknown-unknown --sysroot=/foo %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_OPT %s
-// LINK_OPT: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK_OPT: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_OPT: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C link command-line with known OS.
 
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo %s 2>&1 \
+// RUN: %clang -### --target=wasm32-wasi --sysroot=/foo %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_KNOWN %s
-// LINK_KNOWN: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK_KNOWN: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C link command-line with optimization with known OS.
 
-// RUN: %clang -### -O2 -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo %s 2>&1 \
+// RUN: %clang -### -O2 --target=wasm32-wasi --sysroot=/foo %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_OPT_KNOWN %s
-// LINK_OPT_KNOWN: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK_OPT_KNOWN: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_OPT_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C compile command-line with known OS.
 
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo %s 2>&1 \
+// RUN: %clang -### --target=wasm32-wasi --sysroot=/foo %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=COMPILE %s
-// COMPILE: clang{{.*}}" "-cc1" {{.*}} "-internal-isystem" "/foo/include/wasm32-wasi" "-internal-isystem" "/foo/include"
+// COMPILE: "-cc1" {{.*}} "-internal-isystem" "/foo/include/wasm32-wasi" "-internal-isystem" "/foo/include"
 
 // Thread-related command line tests.
 
 // '-pthread' sets +atomics, +bulk-memory, +mutable-globals, +sign-ext, and --shared-memory
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:    --sysroot=/foo %s -fuse-ld=wasm-ld -pthread 2>&1 \
 // RUN:  | FileCheck -check-prefix=PTHREAD %s
-// PTHREAD: clang{{.*}}" "-cc1" {{.*}} "-target-feature" "+atomics" "-target-feature" "+bulk-memory" "-target-feature" "+mutable-globals" "-target-feature" "+sign-ext"
+// PTHREAD: "-cc1" {{.*}} "-target-feature" "+atomics" "-target-feature" "+bulk-memory" "-target-feature" "+mutable-globals" "-target-feature" "+sign-ext"
 // PTHREAD: wasm-ld{{.*}}" "-lpthread" "--shared-memory"
 
 // '-pthread' not allowed with '-mno-atomics'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:     --sysroot=/foo %s -pthread -mno-atomics 2>&1 \
 // RUN:   | FileCheck -check-prefix=PTHREAD_NO_ATOMICS %s
 // PTHREAD_NO_ATOMICS: invalid argument '-pthread' not allowed with '-mno-atomics'
 
 // '-pthread' not allowed with '-mno-bulk-memory'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:     --sysroot=/foo %s -pthread -mno-bulk-memory 2>&1 \
 // RUN:   | FileCheck -check-prefix=PTHREAD_NO_BULK_MEM %s
 // PTHREAD_NO_BULK_MEM: invalid argument '-pthread' not allowed with '-mno-bulk-memory'
 
 // '-pthread' not allowed with '-mno-mutable-globals'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:     --sysroot=/foo %s -pthread -mno-mutable-globals 2>&1 \
 // RUN:   | FileCheck -check-prefix=PTHREAD_NO_MUT_GLOBALS %s
 // PTHREAD_NO_MUT_GLOBALS: invalid argument '-pthread' not allowed with '-mno-mutable-globals'
 
 // '-pthread' not allowed with '-mno-sign-ext'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:     --sysroot=/foo %s -pthread -mno-sign-ext 2>&1 \
 // RUN:   | FileCheck -check-prefix=PTHREAD_NO_SIGN_EXT %s
 // PTHREAD_NO_SIGN_EXT: invalid argument '-pthread' not allowed with '-mno-sign-ext'
 
 // '-mllvm -emscripten-cxx-exceptions-allowed=foo,bar' sets
 // '-mllvm --force-attribute=foo:noinline -mllvm --force-attribute=bar:noinline'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:    --sysroot=/foo %s -mllvm -enable-emscripten-cxx-exceptions \
 // RUN:    -mllvm -emscripten-cxx-exceptions-allowed=foo,bar 2>&1 \
 // RUN:  | FileCheck -check-prefix=EMSCRIPTEN_EH_ALLOWED_NOINLINE %s
-// EMSCRIPTEN_EH_ALLOWED_NOINLINE: clang{{.*}}" "-cc1" {{.*}} "-mllvm" "--force-attribute=foo:noinline" "-mllvm" "--force-attribute=bar:noinline"
+// EMSCRIPTEN_EH_ALLOWED_NOINLINE: "-cc1" {{.*}} "-mllvm" "--force-attribute=foo:noinline" "-mllvm" "--force-attribute=bar:noinline"
 
 // '-mllvm -emscripten-cxx-exceptions-allowed' only allowed with
 // '-mllvm -enable-emscripten-cxx-exceptions'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:     --sysroot=/foo %s -mllvm -emscripten-cxx-exceptions-allowed 2>&1 \
 // RUN:   | FileCheck -check-prefix=EMSCRIPTEN_EH_ALLOWED_WO_ENABLE %s
 // EMSCRIPTEN_EH_ALLOWED_WO_ENABLE: invalid argument '-mllvm -emscripten-cxx-exceptions-allowed' only allowed with '-mllvm -enable-emscripten-cxx-exceptions'
 
 // '-fwasm-exceptions' sets +exception-handling and '-mllvm -wasm-enable-eh'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:    --sysroot=/foo %s -fwasm-exceptions 2>&1 \
 // RUN:  | FileCheck -check-prefix=WASM_EXCEPTIONS %s
-// WASM_EXCEPTIONS: clang{{.*}}" "-cc1" {{.*}} "-target-feature" "+exception-handling" "-mllvm" "-wasm-enable-eh"
+// WASM_EXCEPTIONS: "-cc1" {{.*}} "-target-feature" "+exception-handling" "-mllvm" "-wasm-enable-eh"
 
 // '-fwasm-exceptions' not allowed with '-mno-exception-handling'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:     --sysroot=/foo %s -fwasm-exceptions -mno-exception-handling 2>&1 \
 // RUN:   | FileCheck -check-prefix=WASM_EXCEPTIONS_NO_EH %s
 // WASM_EXCEPTIONS_NO_EH: invalid argument '-fwasm-exceptions' not allowed with '-mno-exception-handling'
 
 // '-fwasm-exceptions' not allowed with '-mllvm -enable-emscripten-cxx-exceptions'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:     --sysroot=/foo %s -fwasm-exceptions \
 // RUN:     -mllvm -enable-emscripten-cxx-exceptions 2>&1 \
 // RUN:   | FileCheck -check-prefix=WASM_EXCEPTIONS_EMSCRIPTEN_EH %s
@@ -115,13 +115,13 @@
 
 // '-mllvm -wasm-enable-sjlj' sets +exception-handling and
 // '-exception-model=wasm'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:    --sysroot=/foo %s -mllvm -wasm-enable-sjlj 2>&1 \
 // RUN:  | FileCheck -check-prefix=WASM_SJLJ %s
-// WASM_SJLJ: clang{{.*}}" "-cc1" {{.*}} "-target-feature" "+exception-handling" "-exception-model=wasm"
+// WASM_SJLJ: "-cc1" {{.*}} "-target-feature" "+exception-handling" "-exception-model=wasm"
 
 // '-mllvm -wasm-enable-sjlj' not allowed with '-mno-exception-handling'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:     --sysroot=/foo %s -mllvm -wasm-enable-sjlj -mno-exception-handling \
 // RUN:     2>&1 \
 // RUN:   | FileCheck -check-prefix=WASM_SJLJ_NO_EH %s
@@ -129,42 +129,42 @@
 
 // '-mllvm -wasm-enable-sjlj' not allowed with
 // '-mllvm -enable-emscripten-cxx-exceptions'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:     --sysroot=/foo %s -mllvm -wasm-enable-sjlj \
 // RUN:     -mllvm -enable-emscripten-cxx-exceptions 2>&1 \
 // RUN:   | FileCheck -check-prefix=WASM_SJLJ_EMSCRIPTEN_EH %s
 // WASM_SJLJ_EMSCRIPTEN_EH: invalid argument '-mllvm -wasm-enable-sjlj' not allowed with '-mllvm -enable-emscripten-cxx-exceptions'
 
 // '-mllvm -wasm-enable-sjlj' not allowed with '-mllvm -enable-emscripten-sjlj'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown \
+// RUN: %clang -### --target=wasm32-unknown-unknown \
 // RUN:     --sysroot=/foo %s -mllvm -wasm-enable-sjlj \
 // RUN:     -mllvm -enable-emscripten-sjlj 2>&1 \
 // RUN:   | FileCheck -check-prefix=WASM_SJLJ_EMSCRIPTEN_SJLJ %s
 // WASM_SJLJ_EMSCRIPTEN_SJLJ: invalid argument '-mllvm -wasm-enable-sjlj' not allowed with '-mllvm -enable-emscripten-sjlj'
 
-// RUN: %clang %s -### -fsanitize=address -target wasm32-unknown-emscripten 2>&1 | FileCheck -check-prefix=CHECK-ASAN-EMSCRIPTEN %s
+// RUN: %clang -### %s -fsanitize=address --target=wasm32-unknown-emscripten 2>&1 | FileCheck -check-prefix=CHECK-ASAN-EMSCRIPTEN %s
 // CHECK-ASAN-EMSCRIPTEN: "-fsanitize=address"
 // CHECK-ASAN-EMSCRIPTEN: "-fsanitize-address-globals-dead-stripping"
 
 // Basic exec-model tests.
 
-// RUN: %clang %s -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -mexec-model=command 2>&1 \
+// RUN: %clang -### %s --target=wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -mexec-model=command 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-COMMAND %s
-// CHECK-COMMAND: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// CHECK-COMMAND: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // CHECK-COMMAND: wasm-ld{{.*}}" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
-// RUN: %clang %s -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -mexec-model=reactor 2>&1 \
+// RUN: %clang -### %s --target=wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -mexec-model=reactor 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-REACTOR %s
-// CHECK-REACTOR: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// CHECK-REACTOR: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // CHECK-REACTOR: wasm-ld{{.*}}" "crt1-reactor.o" "--entry" "_initialize" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // -fPIC implies +mutable-globals
 
-// RUN: %clang %s -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -fPIC 2>&1 \
+// RUN: %clang -### %s --target=wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -fPIC 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-PIC %s
-// CHECK-PIC: clang{{.*}}" "-cc1" {{.*}} "-target-feature" "+mutable-globals"
+// CHECK-PIC: "-cc1" {{.*}} "-target-feature" "+mutable-globals"
 
 // '-mno-mutable-globals' is not allowed with '-fPIC'
-// RUN: %clang %s -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -fPIC -mno-mutable-globals %s 2>&1 \
+// RUN: %clang -### %s --target=wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -fPIC -mno-mutable-globals %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=PIC_NO_MUTABLE_GLOBALS %s
 // PIC_NO_MUTABLE_GLOBALS: error: invalid argument '-fPIC' not allowed with '-mno-mutable-globals'
diff --git a/clang/test/Driver/wasm-toolchain.cpp b/clang/test/Driver/wasm-toolchain.cpp
index 3ff6a2c..86a223a 100644
--- a/clang/test/Driver/wasm-toolchain.cpp
+++ b/clang/test/Driver/wasm-toolchain.cpp
@@ -1,71 +1,71 @@
 // A basic clang -cc1 command-line. WebAssembly is somewhat special in
 // enabling -fvisibility=hidden by default.
 
-// RUN: %clangxx %s -### -no-canonical-prefixes -target wasm32-unknown-unknown 2>&1 \
+// RUN: %clangxx -### %s --target=wasm32-unknown-unknown 2>&1 \
 // RUN:   | FileCheck -check-prefix=CC1 %s
-// CC1: clang{{.*}} "-cc1" "-triple" "wasm32-unknown-unknown" {{.*}} "-fvisibility" "hidden" {{.*}}
+// CC1: "-cc1" "-triple" "wasm32-unknown-unknown" {{.*}} "-fvisibility" "hidden" {{.*}}
 
 // Ditto, but ensure that a user -fvisibility=default disables the default
 // -fvisibility=hidden.
 
-// RUN: %clangxx %s -### -target wasm32-unknown-unknown -fvisibility=default 2>&1 \
+// RUN: %clangxx -### %s --target=wasm32-unknown-unknown -fvisibility=default 2>&1 \
 // RUN:   | FileCheck -check-prefix=FVISIBILITY_DEFAULT %s
 // FVISIBILITY_DEFAULT-NOT: hidden
 
 // A basic C++ link command-line with unknown OS.
 
-// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo --stdlib=libc++ %s 2>&1 \
+// RUN: %clangxx -### --target=wasm32-unknown-unknown --sysroot=/foo --stdlib=libc++ %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK %s
-// LINK: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
-// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo --stdlib=libstdc++ %s 2>&1 \
+// RUN: %clangxx -### --target=wasm32-unknown-unknown --sysroot=/foo --stdlib=libstdc++ %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_STDCXX %s
-// LINK_STDCXX: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK_STDCXX: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_STDCXX: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C++ link command-line with optimization with unknown OS.
 
-// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s --stdlib=libc++ 2>&1 \
+// RUN: %clangxx -### -O2 --target=wasm32-unknown-unknown --sysroot=/foo %s --stdlib=libc++ 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_OPT %s
-// LINK_OPT: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK_OPT: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_OPT: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
-// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s --stdlib=libstdc++ 2>&1 \
+// RUN: %clangxx -### -O2 --target=wasm32-unknown-unknown --sysroot=/foo %s --stdlib=libstdc++ 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_OPT_STDCXX %s
-// LINK_OPT_STDCXX: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK_OPT_STDCXX: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_OPT_STDCXX: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C++ link command-line with known OS.
 
-// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo --stdlib=libc++ %s 2>&1 \
+// RUN: %clangxx -### --target=wasm32-wasi --sysroot=/foo --stdlib=libc++ %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_KNOWN %s
-// LINK_KNOWN: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK_KNOWN: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
-// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo --stdlib=libstdc++ %s 2>&1 \
+// RUN: %clangxx -### --target=wasm32-wasi --sysroot=/foo --stdlib=libstdc++ %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_KNOWN_STDCXX %s
-// LINK_KNOWN_STDCXX: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK_KNOWN_STDCXX: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_KNOWN_STDCXX: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C++ link command-line with optimization with known OS.
 
-// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo %s --stdlib=libc++ 2>&1 \
+// RUN: %clangxx -### -O2 --target=wasm32-wasi --sysroot=/foo %s --stdlib=libc++ 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_OPT_KNOWN %s
-// LINK_OPT_KNOWN: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK_OPT_KNOWN: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_OPT_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
-// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo %s --stdlib=libstdc++ 2>&1 \
+// RUN: %clangxx -### -O2 --target=wasm32-wasi --sysroot=/foo %s --stdlib=libstdc++ 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_OPT_KNOWN_STDCXX %s
-// LINK_OPT_KNOWN_STDCXX: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK_OPT_KNOWN_STDCXX: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_OPT_KNOWN_STDCXX: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C++ compile command-line with known OS.
 
-// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-wasi --stdlib=libc++ %s 2>&1 \
+// RUN: %clangxx -### --target=wasm32-wasi --stdlib=libc++ %s 2>&1 \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libcxx_tree/usr \
 // RUN:   | FileCheck -check-prefix=COMPILE %s
-// COMPILE: clang{{.*}}" "-cc1"
+// COMPILE: "-cc1"
 // COMPILE: "-resource-dir" "[[RESOURCE_DIR:[^"]*]]"
 // COMPILE: "-isysroot" "[[SYSROOT:[^"]+]]"
 // COMPILE: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/wasm32-wasi/c++/v1"
@@ -74,10 +74,10 @@
 // COMPILE: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/wasm32-wasi"
 // COMPILE: "-internal-isystem" "[[SYSROOT:[^"]+]]/include"
 
-// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-wasi --stdlib=libstdc++ %s 2>&1 \
+// RUN: %clangxx -### --target=wasm32-wasi --stdlib=libstdc++ %s 2>&1 \
 // RUN:     --sysroot=%S/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr \
 // RUN:   | FileCheck -check-prefix=COMPILE_STDCXX %s
-// COMPILE_STDCXX: clang{{.*}}" "-cc1"
+// COMPILE_STDCXX: "-cc1"
 // COMPILE_STDCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]*]]"
 // COMPILE_STDCXX: "-isysroot" "[[SYSROOT:[^"]+]]"
 // COMPILE_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/c++/4.8/wasm32-wasi"
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index b240eb1..7c7e7e3 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -5,11 +5,11 @@
 
 // RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64
 // AARCH64: error: unknown target CPU 'not-a-cpu'
-// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel{{$}}
+// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
 
 // RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64
 // TUNE_AARCH64: error: unknown target CPU 'not-a-cpu'
-// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel{{$}}
+// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
 
 // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
 // X86: error: unknown target CPU 'not-a-cpu'
diff --git a/clang/test/OpenMP/atomic_capture_codegen.cpp b/clang/test/OpenMP/atomic_capture_codegen.cpp
index c5f45a3..95509df9 100644
--- a/clang/test/OpenMP/atomic_capture_codegen.cpp
+++ b/clang/test/OpenMP/atomic_capture_codegen.cpp
@@ -216,20 +216,8 @@ int main(void) {
 #pragma omp atomic capture
   llv = ullx |= ullv;
 // CHECK: [[EXPR:%.+]] = load float, float* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i32, i32*  bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic, align 4
-// CHECK: br label %[[CONT:.+]]
-// CHECK: [[CONT]]
-// CHECK: [[EXPECTED:%.+]] = phi i32 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
-// CHECK: [[TEMP_I:%.+]] = bitcast float* [[TEMP:%.+]] to i32*
-// CHECK: [[OLD:%.+]] = bitcast i32 [[EXPECTED]] to float
+// CHECK: [[OLD:%.+]] = atomicrmw fadd float* @{{.+}}, float [[EXPR]] monotonic, align 4
 // CHECK: [[ADD:%.+]] = fadd float [[OLD]], [[EXPR]]
-// CHECK: store float [[ADD]], float* [[TEMP]],
-// CHECK: [[DESIRED:%.+]] = load i32, i32* [[TEMP_I]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
-// CHECK: [[OLD_X:%.+]] = extractvalue { i32, i1 } [[RES]], 0
-// CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
-// CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
-// CHECK: [[EXIT]]
 // CHECK: [[CAST:%.+]] = fpext float [[ADD]] to double
 // CHECK: store double [[CAST]], double* @{{.+}},
 #pragma omp atomic capture
diff --git a/clang/test/OpenMP/atomic_update_codegen.cpp b/clang/test/OpenMP/atomic_update_codegen.cpp
index 398c2fd..4cc301b 100644
--- a/clang/test/OpenMP/atomic_update_codegen.cpp
+++ b/clang/test/OpenMP/atomic_update_codegen.cpp
@@ -82,7 +82,7 @@ float2 float2x;
 register int rix __asm__("esp");
 
 int main(void) {
-// CHECK-NOT: atomicrmw
+// CHECK: atomicrmw fadd double* @{{.+}}, double 1.000000e+00 monotonic, align 8
 #pragma omp atomic
   ++dv;
 // CHECK: atomicrmw add i8* @{{.+}}, i8 1 monotonic, align 1
@@ -192,20 +192,7 @@ int main(void) {
 #pragma omp atomic
   ullx |= ullv;
 // CHECK: [[EXPR:%.+]] = load float, float* @{{.+}},
-// CHECK: [[OLD:%.+]] = load atomic i32, i32*  bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic, align 4
-// CHECK: br label %[[CONT:.+]]
-// CHECK: [[CONT]]
-// CHECK: [[EXPECTED:%.+]] = phi i32 [ [[OLD]], %{{.+}} ], [ [[PREV:%.+]], %[[CONT]] ]
-// CHECK: [[BITCAST:%.+]] = bitcast float* [[TEMP:%.+]] to i32*
-// CHECK: [[OLD:%.+]] = bitcast i32 [[EXPECTED]] to float
-// CHECK: [[ADD:%.+]] = fadd float [[OLD]], [[EXPR]]
-// CHECK: store float [[ADD]], float* [[TEMP]],
-// CHECK: [[DESIRED:%.+]] = load i32, i32* [[BITCAST]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
-// CHECK: [[PREV:%.+]] = extractvalue { i32, i1 } [[RES]], 0
-// CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
-// CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
-// CHECK: [[EXIT]]
+// CHECK: atomicrmw fadd float* @{{.+}}, float [[EXPR]] monotonic, align 4
 #pragma omp atomic update
   fx = fx + fv;
 // CHECK: [[EXPR:%.+]] = load double, double* @{{.+}},
diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp
index 4820be8..0fa00b3 100644
--- a/clang/test/OpenMP/for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/for_reduction_codegen.cpp
@@ -698,11 +698,9 @@ int main() {
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8
 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK1-NEXT:    [[REF_TMP23:%.*]] = alloca [[STRUCT_S]], align 4
 // CHECK1-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca float, align 4
-// CHECK1-NEXT:    [[_TMP22:%.*]] = alloca float, align 4
-// CHECK1-NEXT:    [[REF_TMP25:%.*]] = alloca [[STRUCT_S]], align 4
-// CHECK1-NEXT:    [[ATOMIC_TEMP35:%.*]] = alloca float, align 4
-// CHECK1-NEXT:    [[_TMP36:%.*]] = alloca float, align 4
+// CHECK1-NEXT:    [[_TMP31:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK1-NEXT:    store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8
@@ -843,77 +841,59 @@ int main() {
 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK1:       .omp.reduction.case2:
 // CHECK1-NEXT:    [[TMP44:%.*]] = load float, float* [[T_VAR3]], align 4
-// CHECK1-NEXT:    [[TMP45:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK1-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP45]] monotonic, align 4
-// CHECK1-NEXT:    br label [[ATOMIC_CONT:%.*]]
-// CHECK1:       atomic_cont:
-// CHECK1-NEXT:    [[TMP46:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP54:%.*]], [[ATOMIC_CONT]] ]
-// CHECK1-NEXT:    [[TMP47:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
-// CHECK1-NEXT:    [[TMP48:%.*]] = bitcast i32 [[TMP46]] to float
-// CHECK1-NEXT:    store float [[TMP48]], float* [[_TMP22]], align 4
-// CHECK1-NEXT:    [[TMP49:%.*]] = load float, float* [[_TMP22]], align 4
-// CHECK1-NEXT:    [[TMP50:%.*]] = load float, float* [[T_VAR3]], align 4
-// CHECK1-NEXT:    [[ADD23:%.*]] = fadd float [[TMP49]], [[TMP50]]
-// CHECK1-NEXT:    store float [[ADD23]], float* [[ATOMIC_TEMP]], align 4
-// CHECK1-NEXT:    [[TMP51:%.*]] = load i32, i32* [[TMP47]], align 4
-// CHECK1-NEXT:    [[TMP52:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK1-NEXT:    [[TMP53:%.*]] = cmpxchg i32* [[TMP52]], i32 [[TMP46]], i32 [[TMP51]] monotonic monotonic, align 4
-// CHECK1-NEXT:    [[TMP54]] = extractvalue { i32, i1 } [[TMP53]], 0
-// CHECK1-NEXT:    [[TMP55:%.*]] = extractvalue { i32, i1 } [[TMP53]], 1
-// CHECK1-NEXT:    br i1 [[TMP55]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK1:       atomic_exit:
+// CHECK1-NEXT:    [[TMP45:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP44]] monotonic, align 4
 // CHECK1-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT:    [[CALL24:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]])
-// CHECK1-NEXT:    [[TMP56:%.*]] = bitcast %struct.S* [[TMP7]] to i8*
-// CHECK1-NEXT:    [[TMP57:%.*]] = bitcast %struct.S* [[CALL24]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP56]], i8* align 4 [[TMP57]], i64 4, i1 false)
+// CHECK1-NEXT:    [[CALL22:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]])
+// CHECK1-NEXT:    [[TMP46:%.*]] = bitcast %struct.S* [[TMP7]] to i8*
+// CHECK1-NEXT:    [[TMP47:%.*]] = bitcast %struct.S* [[CALL22]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 4, i1 false)
 // CHECK1-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
 // CHECK1-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT:    [[CALL26:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
-// CHECK1-NEXT:    [[TOBOOL27:%.*]] = fcmp une float [[CALL26]], 0.000000e+00
-// CHECK1-NEXT:    br i1 [[TOBOOL27]], label [[LAND_RHS28:%.*]], label [[LAND_END31:%.*]]
-// CHECK1:       land.rhs28:
-// CHECK1-NEXT:    [[CALL29:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]])
-// CHECK1-NEXT:    [[TOBOOL30:%.*]] = fcmp une float [[CALL29]], 0.000000e+00
-// CHECK1-NEXT:    br label [[LAND_END31]]
-// CHECK1:       land.end31:
-// CHECK1-NEXT:    [[TMP58:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL30]], [[LAND_RHS28]] ]
-// CHECK1-NEXT:    [[CONV32:%.*]] = uitofp i1 [[TMP58]] to float
-// CHECK1-NEXT:    call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP25]], float noundef [[CONV32]])
-// CHECK1-NEXT:    [[TMP59:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
-// CHECK1-NEXT:    [[TMP60:%.*]] = bitcast %struct.S* [[REF_TMP25]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP59]], i8* align 4 [[TMP60]], i64 4, i1 false)
-// CHECK1-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP25]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[CALL24:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
+// CHECK1-NEXT:    [[TOBOOL25:%.*]] = fcmp une float [[CALL24]], 0.000000e+00
+// CHECK1-NEXT:    br i1 [[TOBOOL25]], label [[LAND_RHS26:%.*]], label [[LAND_END29:%.*]]
+// CHECK1:       land.rhs26:
+// CHECK1-NEXT:    [[CALL27:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]])
+// CHECK1-NEXT:    [[TOBOOL28:%.*]] = fcmp une float [[CALL27]], 0.000000e+00
+// CHECK1-NEXT:    br label [[LAND_END29]]
+// CHECK1:       land.end29:
+// CHECK1-NEXT:    [[TMP48:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL28]], [[LAND_RHS26]] ]
+// CHECK1-NEXT:    [[CONV30:%.*]] = uitofp i1 [[TMP48]] to float
+// CHECK1-NEXT:    call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]], float noundef [[CONV30]])
+// CHECK1-NEXT:    [[TMP49:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
+// CHECK1-NEXT:    [[TMP50:%.*]] = bitcast %struct.S* [[REF_TMP23]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP49]], i8* align 4 [[TMP50]], i64 4, i1 false)
+// CHECK1-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]]) #[[ATTR5]]
 // CHECK1-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT:    [[TMP61:%.*]] = load float, float* [[T_VAR17]], align 4
-// CHECK1-NEXT:    [[TMP62:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK1-NEXT:    [[ATOMIC_LOAD33:%.*]] = load atomic i32, i32* [[TMP62]] monotonic, align 4
-// CHECK1-NEXT:    br label [[ATOMIC_CONT34:%.*]]
-// CHECK1:       atomic_cont34:
-// CHECK1-NEXT:    [[TMP63:%.*]] = phi i32 [ [[ATOMIC_LOAD33]], [[LAND_END31]] ], [ [[TMP73:%.*]], [[COND_END40:%.*]] ]
-// CHECK1-NEXT:    [[TMP64:%.*]] = bitcast float* [[ATOMIC_TEMP35]] to i32*
-// CHECK1-NEXT:    [[TMP65:%.*]] = bitcast i32 [[TMP63]] to float
-// CHECK1-NEXT:    store float [[TMP65]], float* [[_TMP36]], align 4
-// CHECK1-NEXT:    [[TMP66:%.*]] = load float, float* [[_TMP36]], align 4
-// CHECK1-NEXT:    [[TMP67:%.*]] = load float, float* [[T_VAR17]], align 4
-// CHECK1-NEXT:    [[CMP37:%.*]] = fcmp olt float [[TMP66]], [[TMP67]]
-// CHECK1-NEXT:    br i1 [[CMP37]], label [[COND_TRUE38:%.*]], label [[COND_FALSE39:%.*]]
-// CHECK1:       cond.true38:
-// CHECK1-NEXT:    [[TMP68:%.*]] = load float, float* [[_TMP36]], align 4
-// CHECK1-NEXT:    br label [[COND_END40]]
-// CHECK1:       cond.false39:
-// CHECK1-NEXT:    [[TMP69:%.*]] = load float, float* [[T_VAR17]], align 4
-// CHECK1-NEXT:    br label [[COND_END40]]
-// CHECK1:       cond.end40:
-// CHECK1-NEXT:    [[COND41:%.*]] = phi float [ [[TMP68]], [[COND_TRUE38]] ], [ [[TMP69]], [[COND_FALSE39]] ]
-// CHECK1-NEXT:    store float [[COND41]], float* [[ATOMIC_TEMP35]], align 4
-// CHECK1-NEXT:    [[TMP70:%.*]] = load i32, i32* [[TMP64]], align 4
-// CHECK1-NEXT:    [[TMP71:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK1-NEXT:    [[TMP72:%.*]] = cmpxchg i32* [[TMP71]], i32 [[TMP63]], i32 [[TMP70]] monotonic monotonic, align 4
-// CHECK1-NEXT:    [[TMP73]] = extractvalue { i32, i1 } [[TMP72]], 0
-// CHECK1-NEXT:    [[TMP74:%.*]] = extractvalue { i32, i1 } [[TMP72]], 1
-// CHECK1-NEXT:    br i1 [[TMP74]], label [[ATOMIC_EXIT42:%.*]], label [[ATOMIC_CONT34]]
-// CHECK1:       atomic_exit42:
+// CHECK1-NEXT:    [[TMP51:%.*]] = load float, float* [[T_VAR17]], align 4
+// CHECK1-NEXT:    [[TMP52:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK1-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP52]] monotonic, align 4
+// CHECK1-NEXT:    br label [[ATOMIC_CONT:%.*]]
+// CHECK1:       atomic_cont:
+// CHECK1-NEXT:    [[TMP53:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END29]] ], [ [[TMP63:%.*]], [[COND_END35:%.*]] ]
+// CHECK1-NEXT:    [[TMP54:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
+// CHECK1-NEXT:    [[TMP55:%.*]] = bitcast i32 [[TMP53]] to float
+// CHECK1-NEXT:    store float [[TMP55]], float* [[_TMP31]], align 4
+// CHECK1-NEXT:    [[TMP56:%.*]] = load float, float* [[_TMP31]], align 4
+// CHECK1-NEXT:    [[TMP57:%.*]] = load float, float* [[T_VAR17]], align 4
+// CHECK1-NEXT:    [[CMP32:%.*]] = fcmp olt float [[TMP56]], [[TMP57]]
+// CHECK1-NEXT:    br i1 [[CMP32]], label [[COND_TRUE33:%.*]], label [[COND_FALSE34:%.*]]
+// CHECK1:       cond.true33:
+// CHECK1-NEXT:    [[TMP58:%.*]] = load float, float* [[_TMP31]], align 4
+// CHECK1-NEXT:    br label [[COND_END35]]
+// CHECK1:       cond.false34:
+// CHECK1-NEXT:    [[TMP59:%.*]] = load float, float* [[T_VAR17]], align 4
+// CHECK1-NEXT:    br label [[COND_END35]]
+// CHECK1:       cond.end35:
+// CHECK1-NEXT:    [[COND36:%.*]] = phi float [ [[TMP58]], [[COND_TRUE33]] ], [ [[TMP59]], [[COND_FALSE34]] ]
+// CHECK1-NEXT:    store float [[COND36]], float* [[ATOMIC_TEMP]], align 4
+// CHECK1-NEXT:    [[TMP60:%.*]] = load i32, i32* [[TMP54]], align 4
+// CHECK1-NEXT:    [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK1-NEXT:    [[TMP62:%.*]] = cmpxchg i32* [[TMP61]], i32 [[TMP53]], i32 [[TMP60]] monotonic monotonic, align 4
+// CHECK1-NEXT:    [[TMP63]] = extractvalue { i32, i1 } [[TMP62]], 0
+// CHECK1-NEXT:    [[TMP64:%.*]] = extractvalue { i32, i1 } [[TMP62]], 1
+// CHECK1-NEXT:    br i1 [[TMP64]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
+// CHECK1:       atomic_exit:
 // CHECK1-NEXT:    call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var)
 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK1:       .omp.reduction.default:
@@ -4633,11 +4613,9 @@ int main() {
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8
 // CHECK2-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK2-NEXT:    [[REF_TMP23:%.*]] = alloca [[STRUCT_S]], align 4
 // CHECK2-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca float, align 4
-// CHECK2-NEXT:    [[_TMP22:%.*]] = alloca float, align 4
-// CHECK2-NEXT:    [[REF_TMP25:%.*]] = alloca [[STRUCT_S]], align 4
-// CHECK2-NEXT:    [[ATOMIC_TEMP35:%.*]] = alloca float, align 4
-// CHECK2-NEXT:    [[_TMP36:%.*]] = alloca float, align 4
+// CHECK2-NEXT:    [[_TMP31:%.*]] = alloca float, align 4
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK2-NEXT:    store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8
@@ -4778,77 +4756,59 @@ int main() {
 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK2:       .omp.reduction.case2:
 // CHECK2-NEXT:    [[TMP44:%.*]] = load float, float* [[T_VAR3]], align 4
-// CHECK2-NEXT:    [[TMP45:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK2-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP45]] monotonic, align 4
-// CHECK2-NEXT:    br label [[ATOMIC_CONT:%.*]]
-// CHECK2:       atomic_cont:
-// CHECK2-NEXT:    [[TMP46:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP54:%.*]], [[ATOMIC_CONT]] ]
-// CHECK2-NEXT:    [[TMP47:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
-// CHECK2-NEXT:    [[TMP48:%.*]] = bitcast i32 [[TMP46]] to float
-// CHECK2-NEXT:    store float [[TMP48]], float* [[_TMP22]], align 4
-// CHECK2-NEXT:    [[TMP49:%.*]] = load float, float* [[_TMP22]], align 4
-// CHECK2-NEXT:    [[TMP50:%.*]] = load float, float* [[T_VAR3]], align 4
-// CHECK2-NEXT:    [[ADD23:%.*]] = fadd float [[TMP49]], [[TMP50]]
-// CHECK2-NEXT:    store float [[ADD23]], float* [[ATOMIC_TEMP]], align 4
-// CHECK2-NEXT:    [[TMP51:%.*]] = load i32, i32* [[TMP47]], align 4
-// CHECK2-NEXT:    [[TMP52:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK2-NEXT:    [[TMP53:%.*]] = cmpxchg i32* [[TMP52]], i32 [[TMP46]], i32 [[TMP51]] monotonic monotonic, align 4
-// CHECK2-NEXT:    [[TMP54]] = extractvalue { i32, i1 } [[TMP53]], 0
-// CHECK2-NEXT:    [[TMP55:%.*]] = extractvalue { i32, i1 } [[TMP53]], 1
-// CHECK2-NEXT:    br i1 [[TMP55]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK2:       atomic_exit:
+// CHECK2-NEXT:    [[TMP45:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP44]] monotonic, align 4
 // CHECK2-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT:    [[CALL24:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]])
-// CHECK2-NEXT:    [[TMP56:%.*]] = bitcast %struct.S* [[TMP7]] to i8*
-// CHECK2-NEXT:    [[TMP57:%.*]] = bitcast %struct.S* [[CALL24]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP56]], i8* align 4 [[TMP57]], i64 4, i1 false)
+// CHECK2-NEXT:    [[CALL22:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]])
+// CHECK2-NEXT:    [[TMP46:%.*]] = bitcast %struct.S* [[TMP7]] to i8*
+// CHECK2-NEXT:    [[TMP47:%.*]] = bitcast %struct.S* [[CALL22]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 4, i1 false)
 // CHECK2-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
 // CHECK2-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT:    [[CALL26:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
-// CHECK2-NEXT:    [[TOBOOL27:%.*]] = fcmp une float [[CALL26]], 0.000000e+00
-// CHECK2-NEXT:    br i1 [[TOBOOL27]], label [[LAND_RHS28:%.*]], label [[LAND_END31:%.*]]
-// CHECK2:       land.rhs28:
-// CHECK2-NEXT:    [[CALL29:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]])
-// CHECK2-NEXT:    [[TOBOOL30:%.*]] = fcmp une float [[CALL29]], 0.000000e+00
-// CHECK2-NEXT:    br label [[LAND_END31]]
-// CHECK2:       land.end31:
-// CHECK2-NEXT:    [[TMP58:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL30]], [[LAND_RHS28]] ]
-// CHECK2-NEXT:    [[CONV32:%.*]] = uitofp i1 [[TMP58]] to float
-// CHECK2-NEXT:    call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP25]], float noundef [[CONV32]])
-// CHECK2-NEXT:    [[TMP59:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
-// CHECK2-NEXT:    [[TMP60:%.*]] = bitcast %struct.S* [[REF_TMP25]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP59]], i8* align 4 [[TMP60]], i64 4, i1 false)
-// CHECK2-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP25]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[CALL24:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
+// CHECK2-NEXT:    [[TOBOOL25:%.*]] = fcmp une float [[CALL24]], 0.000000e+00
+// CHECK2-NEXT:    br i1 [[TOBOOL25]], label [[LAND_RHS26:%.*]], label [[LAND_END29:%.*]]
+// CHECK2:       land.rhs26:
+// CHECK2-NEXT:    [[CALL27:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]])
+// CHECK2-NEXT:    [[TOBOOL28:%.*]] = fcmp une float [[CALL27]], 0.000000e+00
+// CHECK2-NEXT:    br label [[LAND_END29]]
+// CHECK2:       land.end29:
+// CHECK2-NEXT:    [[TMP48:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL28]], [[LAND_RHS26]] ]
+// CHECK2-NEXT:    [[CONV30:%.*]] = uitofp i1 [[TMP48]] to float
+// CHECK2-NEXT:    call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]], float noundef [[CONV30]])
+// CHECK2-NEXT:    [[TMP49:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
+// CHECK2-NEXT:    [[TMP50:%.*]] = bitcast %struct.S* [[REF_TMP23]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP49]], i8* align 4 [[TMP50]], i64 4, i1 false)
+// CHECK2-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]]) #[[ATTR5]]
 // CHECK2-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT:    [[TMP61:%.*]] = load float, float* [[T_VAR17]], align 4
-// CHECK2-NEXT:    [[TMP62:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK2-NEXT:    [[ATOMIC_LOAD33:%.*]] = load atomic i32, i32* [[TMP62]] monotonic, align 4
-// CHECK2-NEXT:    br label [[ATOMIC_CONT34:%.*]]
-// CHECK2:       atomic_cont34:
-// CHECK2-NEXT:    [[TMP63:%.*]] = phi i32 [ [[ATOMIC_LOAD33]], [[LAND_END31]] ], [ [[TMP73:%.*]], [[COND_END40:%.*]] ]
-// CHECK2-NEXT:    [[TMP64:%.*]] = bitcast float* [[ATOMIC_TEMP35]] to i32*
-// CHECK2-NEXT:    [[TMP65:%.*]] = bitcast i32 [[TMP63]] to float
-// CHECK2-NEXT:    store float [[TMP65]], float* [[_TMP36]], align 4
-// CHECK2-NEXT:    [[TMP66:%.*]] = load float, float* [[_TMP36]], align 4
-// CHECK2-NEXT:    [[TMP67:%.*]] = load float, float* [[T_VAR17]], align 4
-// CHECK2-NEXT:    [[CMP37:%.*]] = fcmp olt float [[TMP66]], [[TMP67]]
-// CHECK2-NEXT:    br i1 [[CMP37]], label [[COND_TRUE38:%.*]], label [[COND_FALSE39:%.*]]
-// CHECK2:       cond.true38:
-// CHECK2-NEXT:    [[TMP68:%.*]] = load float, float* [[_TMP36]], align 4
-// CHECK2-NEXT:    br label [[COND_END40]]
-// CHECK2:       cond.false39:
-// CHECK2-NEXT:    [[TMP69:%.*]] = load float, float* [[T_VAR17]], align 4
-// CHECK2-NEXT:    br label [[COND_END40]]
-// CHECK2:       cond.end40:
-// CHECK2-NEXT:    [[COND41:%.*]] = phi float [ [[TMP68]], [[COND_TRUE38]] ], [ [[TMP69]], [[COND_FALSE39]] ]
-// CHECK2-NEXT:    store float [[COND41]], float* [[ATOMIC_TEMP35]], align 4
-// CHECK2-NEXT:    [[TMP70:%.*]] = load i32, i32* [[TMP64]], align 4
-// CHECK2-NEXT:    [[TMP71:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK2-NEXT:    [[TMP72:%.*]] = cmpxchg i32* [[TMP71]], i32 [[TMP63]], i32 [[TMP70]] monotonic monotonic, align 4
-// CHECK2-NEXT:    [[TMP73]] = extractvalue { i32, i1 } [[TMP72]], 0
-// CHECK2-NEXT:    [[TMP74:%.*]] = extractvalue { i32, i1 } [[TMP72]], 1
-// CHECK2-NEXT:    br i1 [[TMP74]], label [[ATOMIC_EXIT42:%.*]], label [[ATOMIC_CONT34]]
-// CHECK2:       atomic_exit42:
+// CHECK2-NEXT:    [[TMP51:%.*]] = load float, float* [[T_VAR17]], align 4
+// CHECK2-NEXT:    [[TMP52:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK2-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP52]] monotonic, align 4
+// CHECK2-NEXT:    br label [[ATOMIC_CONT:%.*]]
+// CHECK2:       atomic_cont:
+// CHECK2-NEXT:    [[TMP53:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END29]] ], [ [[TMP63:%.*]], [[COND_END35:%.*]] ]
+// CHECK2-NEXT:    [[TMP54:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
+// CHECK2-NEXT:    [[TMP55:%.*]] = bitcast i32 [[TMP53]] to float
+// CHECK2-NEXT:    store float [[TMP55]], float* [[_TMP31]], align 4
+// CHECK2-NEXT:    [[TMP56:%.*]] = load float, float* [[_TMP31]], align 4
+// CHECK2-NEXT:    [[TMP57:%.*]] = load float, float* [[T_VAR17]], align 4
+// CHECK2-NEXT:    [[CMP32:%.*]] = fcmp olt float [[TMP56]], [[TMP57]]
+// CHECK2-NEXT:    br i1 [[CMP32]], label [[COND_TRUE33:%.*]], label [[COND_FALSE34:%.*]]
+// CHECK2:       cond.true33:
+// CHECK2-NEXT:    [[TMP58:%.*]] = load float, float* [[_TMP31]], align 4
+// CHECK2-NEXT:    br label [[COND_END35]]
+// CHECK2:       cond.false34:
+// CHECK2-NEXT:    [[TMP59:%.*]] = load float, float* [[T_VAR17]], align 4
+// CHECK2-NEXT:    br label [[COND_END35]]
+// CHECK2:       cond.end35:
+// CHECK2-NEXT:    [[COND36:%.*]] = phi float [ [[TMP58]], [[COND_TRUE33]] ], [ [[TMP59]], [[COND_FALSE34]] ]
+// CHECK2-NEXT:    store float [[COND36]], float* [[ATOMIC_TEMP]], align 4
+// CHECK2-NEXT:    [[TMP60:%.*]] = load i32, i32* [[TMP54]], align 4
+// CHECK2-NEXT:    [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK2-NEXT:    [[TMP62:%.*]] = cmpxchg i32* [[TMP61]], i32 [[TMP53]], i32 [[TMP60]] monotonic monotonic, align 4
+// CHECK2-NEXT:    [[TMP63]] = extractvalue { i32, i1 } [[TMP62]], 0
+// CHECK2-NEXT:    [[TMP64:%.*]] = extractvalue { i32, i1 } [[TMP62]], 1
+// CHECK2-NEXT:    br i1 [[TMP64]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
+// CHECK2:       atomic_exit:
 // CHECK2-NEXT:    call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var)
 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK2:       .omp.reduction.default:
@@ -8425,10 +8385,6 @@ int main() {
 // CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8
-// CHECK3-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca double, align 8
-// CHECK3-NEXT:    [[_TMP7:%.*]] = alloca double, align 8
-// CHECK3-NEXT:    [[ATOMIC_TEMP11:%.*]] = alloca double, align 8
-// CHECK3-NEXT:    [[_TMP12:%.*]] = alloca double, align 8
 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load double*, double** @g1, align 8
@@ -8514,43 +8470,9 @@ int main() {
 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK3:       .omp.reduction.case2:
 // CHECK3-NEXT:    [[TMP25:%.*]] = load double, double* [[G]], align 8
-// CHECK3-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* bitcast (double* @g to i64*) monotonic, align 8
-// CHECK3-NEXT:    br label [[ATOMIC_CONT:%.*]]
-// CHECK3:       atomic_cont:
-// CHECK3-NEXT:    [[TMP26:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP33:%.*]], [[ATOMIC_CONT]] ]
-// CHECK3-NEXT:    [[TMP27:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64*
-// CHECK3-NEXT:    [[TMP28:%.*]] = bitcast i64 [[TMP26]] to double
-// CHECK3-NEXT:    store double [[TMP28]], double* [[_TMP7]], align 8
-// CHECK3-NEXT:    [[TMP29:%.*]] = load double, double* [[_TMP7]], align 8
-// CHECK3-NEXT:    [[TMP30:%.*]] = load double, double* [[G]], align 8
-// CHECK3-NEXT:    [[ADD8:%.*]] = fadd double [[TMP29]], [[TMP30]]
-// CHECK3-NEXT:    store double [[ADD8]], double* [[ATOMIC_TEMP]], align 8
-// CHECK3-NEXT:    [[TMP31:%.*]] = load i64, i64* [[TMP27]], align 8
-// CHECK3-NEXT:    [[TMP32:%.*]] = cmpxchg i64* bitcast (double* @g to i64*), i64 [[TMP26]], i64 [[TMP31]] monotonic monotonic, align 8
-// CHECK3-NEXT:    [[TMP33]] = extractvalue { i64, i1 } [[TMP32]], 0
-// CHECK3-NEXT:    [[TMP34:%.*]] = extractvalue { i64, i1 } [[TMP32]], 1
-// CHECK3-NEXT:    br i1 [[TMP34]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK3:       atomic_exit:
-// CHECK3-NEXT:    [[TMP35:%.*]] = load double, double* [[G1]], align 8
-// CHECK3-NEXT:    [[TMP36:%.*]] = bitcast double* [[TMP1]] to i64*
-// CHECK3-NEXT:    [[ATOMIC_LOAD9:%.*]] = load atomic i64, i64* [[TMP36]] monotonic, align 8
-// CHECK3-NEXT:    br label [[ATOMIC_CONT10:%.*]]
-// CHECK3:       atomic_cont10:
-// CHECK3-NEXT:    [[TMP37:%.*]] = phi i64 [ [[ATOMIC_LOAD9]], [[ATOMIC_EXIT]] ], [ [[TMP45:%.*]], [[ATOMIC_CONT10]] ]
-// CHECK3-NEXT:    [[TMP38:%.*]] = bitcast double* [[ATOMIC_TEMP11]] to i64*
-// CHECK3-NEXT:    [[TMP39:%.*]] = bitcast i64 [[TMP37]] to double
-// CHECK3-NEXT:    store double [[TMP39]], double* [[_TMP12]], align 8
-// CHECK3-NEXT:    [[TMP40:%.*]] = load double, double* [[_TMP12]], align 8
-// CHECK3-NEXT:    [[TMP41:%.*]] = load double, double* [[G1]], align 8
-// CHECK3-NEXT:    [[ADD13:%.*]] = fadd double [[TMP40]], [[TMP41]]
-// CHECK3-NEXT:    store double [[ADD13]], double* [[ATOMIC_TEMP11]], align 8
-// CHECK3-NEXT:    [[TMP42:%.*]] = load i64, i64* [[TMP38]], align 8
-// CHECK3-NEXT:    [[TMP43:%.*]] = bitcast double* [[TMP1]] to i64*
-// CHECK3-NEXT:    [[TMP44:%.*]] = cmpxchg i64* [[TMP43]], i64 [[TMP37]], i64 [[TMP42]] monotonic monotonic, align 8
-// CHECK3-NEXT:    [[TMP45]] = extractvalue { i64, i1 } [[TMP44]], 0
-// CHECK3-NEXT:    [[TMP46:%.*]] = extractvalue { i64, i1 } [[TMP44]], 1
-// CHECK3-NEXT:    br i1 [[TMP46]], label [[ATOMIC_EXIT14:%.*]], label [[ATOMIC_CONT10]]
-// CHECK3:       atomic_exit14:
+// CHECK3-NEXT:    [[TMP26:%.*]] = atomicrmw fadd double* @g, double [[TMP25]] monotonic, align 8
+// CHECK3-NEXT:    [[TMP27:%.*]] = load double, double* [[G1]], align 8
+// CHECK3-NEXT:    [[TMP28:%.*]] = atomicrmw fadd double* [[TMP1]], double [[TMP27]] monotonic, align 8
 // CHECK3-NEXT:    call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_.reduction.var)
 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK3:       .omp.reduction.default:
@@ -8633,10 +8555,6 @@ int main() {
 // CHECK4-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>, align 8
 // CHECK4-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8
-// CHECK4-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca double, align 8
-// CHECK4-NEXT:    [[_TMP8:%.*]] = alloca double, align 8
-// CHECK4-NEXT:    [[ATOMIC_TEMP12:%.*]] = alloca double, align 8
-// CHECK4-NEXT:    [[_TMP13:%.*]] = alloca double, align 8
 // CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK4-NEXT:    [[TMP0:%.*]] = load double*, double** @g1, align 8
@@ -8739,43 +8657,9 @@ int main() {
 // CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK4:       .omp.reduction.case2:
 // CHECK4-NEXT:    [[TMP29:%.*]] = load double, double* [[G]], align 8
-// CHECK4-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* bitcast (double* @g to i64*) monotonic, align 8
-// CHECK4-NEXT:    br label [[ATOMIC_CONT:%.*]]
-// CHECK4:       atomic_cont:
-// CHECK4-NEXT:    [[TMP30:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP37:%.*]], [[ATOMIC_CONT]] ]
-// CHECK4-NEXT:    [[TMP31:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64*
-// CHECK4-NEXT:    [[TMP32:%.*]] = bitcast i64 [[TMP30]] to double
-// CHECK4-NEXT:    store double [[TMP32]], double* [[_TMP8]], align 8
-// CHECK4-NEXT:    [[TMP33:%.*]] = load double, double* [[_TMP8]], align 8
-// CHECK4-NEXT:    [[TMP34:%.*]] = load double, double* [[G]], align 8
-// CHECK4-NEXT:    [[ADD9:%.*]] = fadd double [[TMP33]], [[TMP34]]
-// CHECK4-NEXT:    store double [[ADD9]], double* [[ATOMIC_TEMP]], align 8
-// CHECK4-NEXT:    [[TMP35:%.*]] = load i64, i64* [[TMP31]], align 8
-// CHECK4-NEXT:    [[TMP36:%.*]] = cmpxchg i64* bitcast (double* @g to i64*), i64 [[TMP30]], i64 [[TMP35]] monotonic monotonic, align 8
-// CHECK4-NEXT:    [[TMP37]] = extractvalue { i64, i1 } [[TMP36]], 0
-// CHECK4-NEXT:    [[TMP38:%.*]] = extractvalue { i64, i1 } [[TMP36]], 1
-// CHECK4-NEXT:    br i1 [[TMP38]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK4:       atomic_exit:
-// CHECK4-NEXT:    [[TMP39:%.*]] = load double, double* [[G1]], align 8
-// CHECK4-NEXT:    [[TMP40:%.*]] = bitcast double* [[TMP1]] to i64*
-// CHECK4-NEXT:    [[ATOMIC_LOAD10:%.*]] = load atomic i64, i64* [[TMP40]] monotonic, align 8
-// CHECK4-NEXT:    br label [[ATOMIC_CONT11:%.*]]
-// CHECK4:       atomic_cont11:
-// CHECK4-NEXT:    [[TMP41:%.*]] = phi i64 [ [[ATOMIC_LOAD10]], [[ATOMIC_EXIT]] ], [ [[TMP49:%.*]], [[ATOMIC_CONT11]] ]
-// CHECK4-NEXT:    [[TMP42:%.*]] = bitcast double* [[ATOMIC_TEMP12]] to i64*
-// CHECK4-NEXT:    [[TMP43:%.*]] = bitcast i64 [[TMP41]] to double
-// CHECK4-NEXT:    store double [[TMP43]], double* [[_TMP13]], align 8
-// CHECK4-NEXT:    [[TMP44:%.*]] = load double, double* [[_TMP13]], align 8
-// CHECK4-NEXT:    [[TMP45:%.*]] = load double, double* [[G1]], align 8
-// CHECK4-NEXT:    [[ADD14:%.*]] = fadd double [[TMP44]], [[TMP45]]
-// CHECK4-NEXT:    store double [[ADD14]], double* [[ATOMIC_TEMP12]], align 8
-// CHECK4-NEXT:    [[TMP46:%.*]] = load i64, i64* [[TMP42]], align 8
-// CHECK4-NEXT:    [[TMP47:%.*]] = bitcast double* [[TMP1]] to i64*
-// CHECK4-NEXT:    [[TMP48:%.*]] = cmpxchg i64* [[TMP47]], i64 [[TMP41]], i64 [[TMP46]] monotonic monotonic, align 8
-// CHECK4-NEXT:    [[TMP49]] = extractvalue { i64, i1 } [[TMP48]], 0
-// CHECK4-NEXT:    [[TMP50:%.*]] = extractvalue { i64, i1 } [[TMP48]], 1
-// CHECK4-NEXT:    br i1 [[TMP50]], label [[ATOMIC_EXIT15:%.*]], label [[ATOMIC_CONT11]]
-// CHECK4:       atomic_exit15:
+// CHECK4-NEXT:    [[TMP30:%.*]] = atomicrmw fadd double* @g, double [[TMP29]] monotonic, align 8
+// CHECK4-NEXT:    [[TMP31:%.*]] = load double, double* [[G1]], align 8
+// CHECK4-NEXT:    [[TMP32:%.*]] = atomicrmw fadd double* [[TMP1]], double [[TMP31]] monotonic, align 8
 // CHECK4-NEXT:    call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_.reduction.var)
 // CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK4:       .omp.reduction.default:
diff --git a/clang/test/OpenMP/parallel_reduction_codegen.cpp b/clang/test/OpenMP/parallel_reduction_codegen.cpp
index 3432947..f40e908 100644
--- a/clang/test/OpenMP/parallel_reduction_codegen.cpp
+++ b/clang/test/OpenMP/parallel_reduction_codegen.cpp
@@ -605,11 +605,9 @@ int main() {
 // CHECK1-NEXT:    [[T_VAR15:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8
 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK1-NEXT:    [[REF_TMP12:%.*]] = alloca [[STRUCT_S]], align 4
 // CHECK1-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca float, align 4
-// CHECK1-NEXT:    [[REF_TMP13:%.*]] = alloca [[STRUCT_S]], align 4
-// CHECK1-NEXT:    [[ATOMIC_TEMP23:%.*]] = alloca float, align 4
-// CHECK1-NEXT:    [[_TMP24:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK1-NEXT:    store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8
@@ -697,77 +695,59 @@ int main() {
 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK1:       .omp.reduction.case2:
 // CHECK1-NEXT:    [[TMP32:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK1-NEXT:    [[TMP33:%.*]] = bitcast float* [[TMP1]] to i32*
-// CHECK1-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP33]] monotonic, align 4
-// CHECK1-NEXT:    br label [[ATOMIC_CONT:%.*]]
-// CHECK1:       atomic_cont:
-// CHECK1-NEXT:    [[TMP34:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP42:%.*]], [[ATOMIC_CONT]] ]
-// CHECK1-NEXT:    [[TMP35:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
-// CHECK1-NEXT:    [[TMP36:%.*]] = bitcast i32 [[TMP34]] to float
-// CHECK1-NEXT:    store float [[TMP36]], float* [[TMP]], align 4
-// CHECK1-NEXT:    [[TMP37:%.*]] = load float, float* [[TMP]], align 4
-// CHECK1-NEXT:    [[TMP38:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK1-NEXT:    [[ADD11:%.*]] = fadd float [[TMP37]], [[TMP38]]
-// CHECK1-NEXT:    store float [[ADD11]], float* [[ATOMIC_TEMP]], align 4
-// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, i32* [[TMP35]], align 4
-// CHECK1-NEXT:    [[TMP40:%.*]] = bitcast float* [[TMP1]] to i32*
-// CHECK1-NEXT:    [[TMP41:%.*]] = cmpxchg i32* [[TMP40]], i32 [[TMP34]], i32 [[TMP39]] monotonic monotonic, align 4
-// CHECK1-NEXT:    [[TMP42]] = extractvalue { i32, i1 } [[TMP41]], 0
-// CHECK1-NEXT:    [[TMP43:%.*]] = extractvalue { i32, i1 } [[TMP41]], 1
-// CHECK1-NEXT:    br i1 [[TMP43]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK1:       atomic_exit:
+// CHECK1-NEXT:    [[TMP33:%.*]] = atomicrmw fadd float* [[TMP1]], float [[TMP32]] monotonic, align 4
 // CHECK1-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT:    [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
-// CHECK1-NEXT:    [[TMP44:%.*]] = bitcast %struct.S* [[TMP3]] to i8*
-// CHECK1-NEXT:    [[TMP45:%.*]] = bitcast %struct.S* [[CALL12]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false)
+// CHECK1-NEXT:    [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
+// CHECK1-NEXT:    [[TMP34:%.*]] = bitcast %struct.S* [[TMP3]] to i8*
+// CHECK1-NEXT:    [[TMP35:%.*]] = bitcast %struct.S* [[CALL11]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false)
 // CHECK1-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
 // CHECK1-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT:    [[CALL14:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]])
-// CHECK1-NEXT:    [[TOBOOL15:%.*]] = fcmp une float [[CALL14]], 0.000000e+00
-// CHECK1-NEXT:    br i1 [[TOBOOL15]], label [[LAND_RHS16:%.*]], label [[LAND_END19:%.*]]
-// CHECK1:       land.rhs16:
-// CHECK1-NEXT:    [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
-// CHECK1-NEXT:    [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00
-// CHECK1-NEXT:    br label [[LAND_END19]]
-// CHECK1:       land.end19:
-// CHECK1-NEXT:    [[TMP46:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL18]], [[LAND_RHS16]] ]
-// CHECK1-NEXT:    [[CONV20:%.*]] = uitofp i1 [[TMP46]] to float
-// CHECK1-NEXT:    call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]], float noundef [[CONV20]])
-// CHECK1-NEXT:    [[TMP47:%.*]] = bitcast %struct.S* [[TMP4]] to i8*
-// CHECK1-NEXT:    [[TMP48:%.*]] = bitcast %struct.S* [[REF_TMP13]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false)
-// CHECK1-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[CALL13:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]])
+// CHECK1-NEXT:    [[TOBOOL14:%.*]] = fcmp une float [[CALL13]], 0.000000e+00
+// CHECK1-NEXT:    br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END18:%.*]]
+// CHECK1:       land.rhs15:
+// CHECK1-NEXT:    [[CALL16:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
+// CHECK1-NEXT:    [[TOBOOL17:%.*]] = fcmp une float [[CALL16]], 0.000000e+00
+// CHECK1-NEXT:    br label [[LAND_END18]]
+// CHECK1:       land.end18:
+// CHECK1-NEXT:    [[TMP36:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL17]], [[LAND_RHS15]] ]
+// CHECK1-NEXT:    [[CONV19:%.*]] = uitofp i1 [[TMP36]] to float
+// CHECK1-NEXT:    call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]], float noundef [[CONV19]])
+// CHECK1-NEXT:    [[TMP37:%.*]] = bitcast %struct.S* [[TMP4]] to i8*
+// CHECK1-NEXT:    [[TMP38:%.*]] = bitcast %struct.S* [[REF_TMP12]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false)
+// CHECK1-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]]) #[[ATTR5]]
 // CHECK1-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT:    [[TMP49:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK1-NEXT:    [[TMP50:%.*]] = bitcast float* [[TMP5]] to i32*
-// CHECK1-NEXT:    [[ATOMIC_LOAD21:%.*]] = load atomic i32, i32* [[TMP50]] monotonic, align 4
-// CHECK1-NEXT:    br label [[ATOMIC_CONT22:%.*]]
-// CHECK1:       atomic_cont22:
-// CHECK1-NEXT:    [[TMP51:%.*]] = phi i32 [ [[ATOMIC_LOAD21]], [[LAND_END19]] ], [ [[TMP61:%.*]], [[COND_END28:%.*]] ]
-// CHECK1-NEXT:    [[TMP52:%.*]] = bitcast float* [[ATOMIC_TEMP23]] to i32*
-// CHECK1-NEXT:    [[TMP53:%.*]] = bitcast i32 [[TMP51]] to float
-// CHECK1-NEXT:    store float [[TMP53]], float* [[_TMP24]], align 4
-// CHECK1-NEXT:    [[TMP54:%.*]] = load float, float* [[_TMP24]], align 4
-// CHECK1-NEXT:    [[TMP55:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK1-NEXT:    [[CMP25:%.*]] = fcmp olt float [[TMP54]], [[TMP55]]
-// CHECK1-NEXT:    br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]]
-// CHECK1:       cond.true26:
-// CHECK1-NEXT:    [[TMP56:%.*]] = load float, float* [[_TMP24]], align 4
-// CHECK1-NEXT:    br label [[COND_END28]]
-// CHECK1:       cond.false27:
-// CHECK1-NEXT:    [[TMP57:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK1-NEXT:    br label [[COND_END28]]
-// CHECK1:       cond.end28:
-// CHECK1-NEXT:    [[COND29:%.*]] = phi float [ [[TMP56]], [[COND_TRUE26]] ], [ [[TMP57]], [[COND_FALSE27]] ]
-// CHECK1-NEXT:    store float [[COND29]], float* [[ATOMIC_TEMP23]], align 4
-// CHECK1-NEXT:    [[TMP58:%.*]] = load i32, i32* [[TMP52]], align 4
-// CHECK1-NEXT:    [[TMP59:%.*]] = bitcast float* [[TMP5]] to i32*
-// CHECK1-NEXT:    [[TMP60:%.*]] = cmpxchg i32* [[TMP59]], i32 [[TMP51]], i32 [[TMP58]] monotonic monotonic, align 4
-// CHECK1-NEXT:    [[TMP61]] = extractvalue { i32, i1 } [[TMP60]], 0
-// CHECK1-NEXT:    [[TMP62:%.*]] = extractvalue { i32, i1 } [[TMP60]], 1
-// CHECK1-NEXT:    br i1 [[TMP62]], label [[ATOMIC_EXIT30:%.*]], label [[ATOMIC_CONT22]]
-// CHECK1:       atomic_exit30:
+// CHECK1-NEXT:    [[TMP39:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK1-NEXT:    [[TMP40:%.*]] = bitcast float* [[TMP5]] to i32*
+// CHECK1-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP40]] monotonic, align 4
+// CHECK1-NEXT:    br label [[ATOMIC_CONT:%.*]]
+// CHECK1:       atomic_cont:
+// CHECK1-NEXT:    [[TMP41:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END18]] ], [ [[TMP51:%.*]], [[COND_END23:%.*]] ]
+// CHECK1-NEXT:    [[TMP42:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
+// CHECK1-NEXT:    [[TMP43:%.*]] = bitcast i32 [[TMP41]] to float
+// CHECK1-NEXT:    store float [[TMP43]], float* [[TMP]], align 4
+// CHECK1-NEXT:    [[TMP44:%.*]] = load float, float* [[TMP]], align 4
+// CHECK1-NEXT:    [[TMP45:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK1-NEXT:    [[CMP20:%.*]] = fcmp olt float [[TMP44]], [[TMP45]]
+// CHECK1-NEXT:    br i1 [[CMP20]], label [[COND_TRUE21:%.*]], label [[COND_FALSE22:%.*]]
+// CHECK1:       cond.true21:
+// CHECK1-NEXT:    [[TMP46:%.*]] = load float, float* [[TMP]], align 4
+// CHECK1-NEXT:    br label [[COND_END23]]
+// CHECK1:       cond.false22:
+// CHECK1-NEXT:    [[TMP47:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK1-NEXT:    br label [[COND_END23]]
+// CHECK1:       cond.end23:
+// CHECK1-NEXT:    [[COND24:%.*]] = phi float [ [[TMP46]], [[COND_TRUE21]] ], [ [[TMP47]], [[COND_FALSE22]] ]
+// CHECK1-NEXT:    store float [[COND24]], float* [[ATOMIC_TEMP]], align 4
+// CHECK1-NEXT:    [[TMP48:%.*]] = load i32, i32* [[TMP42]], align 4
+// CHECK1-NEXT:    [[TMP49:%.*]] = bitcast float* [[TMP5]] to i32*
+// CHECK1-NEXT:    [[TMP50:%.*]] = cmpxchg i32* [[TMP49]], i32 [[TMP41]], i32 [[TMP48]] monotonic monotonic, align 4
+// CHECK1-NEXT:    [[TMP51]] = extractvalue { i32, i1 } [[TMP50]], 0
+// CHECK1-NEXT:    [[TMP52:%.*]] = extractvalue { i32, i1 } [[TMP50]], 1
+// CHECK1-NEXT:    br i1 [[TMP52]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
+// CHECK1:       atomic_exit:
 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK1:       .omp.reduction.default:
 // CHECK1-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]]
@@ -2005,11 +1985,9 @@ int main() {
 // CHECK2-NEXT:    [[T_VAR15:%.*]] = alloca float, align 4
 // CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8
 // CHECK2-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK2-NEXT:    [[REF_TMP12:%.*]] = alloca [[STRUCT_S]], align 4
 // CHECK2-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca float, align 4
 // CHECK2-NEXT:    [[TMP:%.*]] = alloca float, align 4
-// CHECK2-NEXT:    [[REF_TMP13:%.*]] = alloca [[STRUCT_S]], align 4
-// CHECK2-NEXT:    [[ATOMIC_TEMP23:%.*]] = alloca float, align 4
-// CHECK2-NEXT:    [[_TMP24:%.*]] = alloca float, align 4
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK2-NEXT:    store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8
@@ -2097,77 +2075,59 @@ int main() {
 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK2:       .omp.reduction.case2:
 // CHECK2-NEXT:    [[TMP32:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK2-NEXT:    [[TMP33:%.*]] = bitcast float* [[TMP1]] to i32*
-// CHECK2-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP33]] monotonic, align 4
-// CHECK2-NEXT:    br label [[ATOMIC_CONT:%.*]]
-// CHECK2:       atomic_cont:
-// CHECK2-NEXT:    [[TMP34:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP42:%.*]], [[ATOMIC_CONT]] ]
-// CHECK2-NEXT:    [[TMP35:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
-// CHECK2-NEXT:    [[TMP36:%.*]] = bitcast i32 [[TMP34]] to float
-// CHECK2-NEXT:    store float [[TMP36]], float* [[TMP]], align 4
-// CHECK2-NEXT:    [[TMP37:%.*]] = load float, float* [[TMP]], align 4
-// CHECK2-NEXT:    [[TMP38:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK2-NEXT:    [[ADD11:%.*]] = fadd float [[TMP37]], [[TMP38]]
-// CHECK2-NEXT:    store float [[ADD11]], float* [[ATOMIC_TEMP]], align 4
-// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, i32* [[TMP35]], align 4
-// CHECK2-NEXT:    [[TMP40:%.*]] = bitcast float* [[TMP1]] to i32*
-// CHECK2-NEXT:    [[TMP41:%.*]] = cmpxchg i32* [[TMP40]], i32 [[TMP34]], i32 [[TMP39]] monotonic monotonic, align 4
-// CHECK2-NEXT:    [[TMP42]] = extractvalue { i32, i1 } [[TMP41]], 0
-// CHECK2-NEXT:    [[TMP43:%.*]] = extractvalue { i32, i1 } [[TMP41]], 1
-// CHECK2-NEXT:    br i1 [[TMP43]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK2:       atomic_exit:
+// CHECK2-NEXT:    [[TMP33:%.*]] = atomicrmw fadd float* [[TMP1]], float [[TMP32]] monotonic, align 4
 // CHECK2-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT:    [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
-// CHECK2-NEXT:    [[TMP44:%.*]] = bitcast %struct.S* [[TMP3]] to i8*
-// CHECK2-NEXT:    [[TMP45:%.*]] = bitcast %struct.S* [[CALL12]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false)
+// CHECK2-NEXT:    [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
+// CHECK2-NEXT:    [[TMP34:%.*]] = bitcast %struct.S* [[TMP3]] to i8*
+// CHECK2-NEXT:    [[TMP35:%.*]] = bitcast %struct.S* [[CALL11]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false)
 // CHECK2-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
 // CHECK2-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT:    [[CALL14:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]])
-// CHECK2-NEXT:    [[TOBOOL15:%.*]] = fcmp une float [[CALL14]], 0.000000e+00
-// CHECK2-NEXT:    br i1 [[TOBOOL15]], label [[LAND_RHS16:%.*]], label [[LAND_END19:%.*]]
-// CHECK2:       land.rhs16:
-// CHECK2-NEXT:    [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
-// CHECK2-NEXT:    [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00
-// CHECK2-NEXT:    br label [[LAND_END19]]
-// CHECK2:       land.end19:
-// CHECK2-NEXT:    [[TMP46:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL18]], [[LAND_RHS16]] ]
-// CHECK2-NEXT:    [[CONV20:%.*]] = uitofp i1 [[TMP46]] to float
-// CHECK2-NEXT:    call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]], float noundef [[CONV20]])
-// CHECK2-NEXT:    [[TMP47:%.*]] = bitcast %struct.S* [[TMP4]] to i8*
-// CHECK2-NEXT:    [[TMP48:%.*]] = bitcast %struct.S* [[REF_TMP13]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false)
-// CHECK2-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[CALL13:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]])
+// CHECK2-NEXT:    [[TOBOOL14:%.*]] = fcmp une float [[CALL13]], 0.000000e+00
+// CHECK2-NEXT:    br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END18:%.*]]
+// CHECK2:       land.rhs15:
+// CHECK2-NEXT:    [[CALL16:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
+// CHECK2-NEXT:    [[TOBOOL17:%.*]] = fcmp une float [[CALL16]], 0.000000e+00
+// CHECK2-NEXT:    br label [[LAND_END18]]
+// CHECK2:       land.end18:
+// CHECK2-NEXT:    [[TMP36:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL17]], [[LAND_RHS15]] ]
+// CHECK2-NEXT:    [[CONV19:%.*]] = uitofp i1 [[TMP36]] to float
+// CHECK2-NEXT:    call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]], float noundef [[CONV19]])
+// CHECK2-NEXT:    [[TMP37:%.*]] = bitcast %struct.S* [[TMP4]] to i8*
+// CHECK2-NEXT:    [[TMP38:%.*]] = bitcast %struct.S* [[REF_TMP12]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false)
+// CHECK2-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]]) #[[ATTR5]]
 // CHECK2-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT:    [[TMP49:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK2-NEXT:    [[TMP50:%.*]] = bitcast float* [[TMP5]] to i32*
-// CHECK2-NEXT:    [[ATOMIC_LOAD21:%.*]] = load atomic i32, i32* [[TMP50]] monotonic, align 4
-// CHECK2-NEXT:    br label [[ATOMIC_CONT22:%.*]]
-// CHECK2:       atomic_cont22:
-// CHECK2-NEXT:    [[TMP51:%.*]] = phi i32 [ [[ATOMIC_LOAD21]], [[LAND_END19]] ], [ [[TMP61:%.*]], [[COND_END28:%.*]] ]
-// CHECK2-NEXT:    [[TMP52:%.*]] = bitcast float* [[ATOMIC_TEMP23]] to i32*
-// CHECK2-NEXT:    [[TMP53:%.*]] = bitcast i32 [[TMP51]] to float
-// CHECK2-NEXT:    store float [[TMP53]], float* [[_TMP24]], align 4
-// CHECK2-NEXT:    [[TMP54:%.*]] = load float, float* [[_TMP24]], align 4
-// CHECK2-NEXT:    [[TMP55:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK2-NEXT:    [[CMP25:%.*]] = fcmp olt float [[TMP54]], [[TMP55]]
-// CHECK2-NEXT:    br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]]
-// CHECK2:       cond.true26:
-// CHECK2-NEXT:    [[TMP56:%.*]] = load float, float* [[_TMP24]], align 4
-// CHECK2-NEXT:    br label [[COND_END28]]
-// CHECK2:       cond.false27:
-// CHECK2-NEXT:    [[TMP57:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK2-NEXT:    br label [[COND_END28]]
-// CHECK2:       cond.end28:
-// CHECK2-NEXT:    [[COND29:%.*]] = phi float [ [[TMP56]], [[COND_TRUE26]] ], [ [[TMP57]], [[COND_FALSE27]] ]
-// CHECK2-NEXT:    store float [[COND29]], float* [[ATOMIC_TEMP23]], align 4
-// CHECK2-NEXT:    [[TMP58:%.*]] = load i32, i32* [[TMP52]], align 4
-// CHECK2-NEXT:    [[TMP59:%.*]] = bitcast float* [[TMP5]] to i32*
-// CHECK2-NEXT:    [[TMP60:%.*]] = cmpxchg i32* [[TMP59]], i32 [[TMP51]], i32 [[TMP58]] monotonic monotonic, align 4
-// CHECK2-NEXT:    [[TMP61]] = extractvalue { i32, i1 } [[TMP60]], 0
-// CHECK2-NEXT:    [[TMP62:%.*]] = extractvalue { i32, i1 } [[TMP60]], 1
-// CHECK2-NEXT:    br i1 [[TMP62]], label [[ATOMIC_EXIT30:%.*]], label [[ATOMIC_CONT22]]
-// CHECK2:       atomic_exit30:
+// CHECK2-NEXT:    [[TMP39:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK2-NEXT:    [[TMP40:%.*]] = bitcast float* [[TMP5]] to i32*
+// CHECK2-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP40]] monotonic, align 4
+// CHECK2-NEXT:    br label [[ATOMIC_CONT:%.*]]
+// CHECK2:       atomic_cont:
+// CHECK2-NEXT:    [[TMP41:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END18]] ], [ [[TMP51:%.*]], [[COND_END23:%.*]] ]
+// CHECK2-NEXT:    [[TMP42:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
+// CHECK2-NEXT:    [[TMP43:%.*]] = bitcast i32 [[TMP41]] to float
+// CHECK2-NEXT:    store float [[TMP43]], float* [[TMP]], align 4
+// CHECK2-NEXT:    [[TMP44:%.*]] = load float, float* [[TMP]], align 4
+// CHECK2-NEXT:    [[TMP45:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK2-NEXT:    [[CMP20:%.*]] = fcmp olt float [[TMP44]], [[TMP45]]
+// CHECK2-NEXT:    br i1 [[CMP20]], label [[COND_TRUE21:%.*]], label [[COND_FALSE22:%.*]]
+// CHECK2:       cond.true21:
+// CHECK2-NEXT:    [[TMP46:%.*]] = load float, float* [[TMP]], align 4
+// CHECK2-NEXT:    br label [[COND_END23]]
+// CHECK2:       cond.false22:
+// CHECK2-NEXT:    [[TMP47:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK2-NEXT:    br label [[COND_END23]]
+// CHECK2:       cond.end23:
+// CHECK2-NEXT:    [[COND24:%.*]] = phi float [ [[TMP46]], [[COND_TRUE21]] ], [ [[TMP47]], [[COND_FALSE22]] ]
+// CHECK2-NEXT:    store float [[COND24]], float* [[ATOMIC_TEMP]], align 4
+// CHECK2-NEXT:    [[TMP48:%.*]] = load i32, i32* [[TMP42]], align 4
+// CHECK2-NEXT:    [[TMP49:%.*]] = bitcast float* [[TMP5]] to i32*
+// CHECK2-NEXT:    [[TMP50:%.*]] = cmpxchg i32* [[TMP49]], i32 [[TMP41]], i32 [[TMP48]] monotonic monotonic, align 4
+// CHECK2-NEXT:    [[TMP51]] = extractvalue { i32, i1 } [[TMP50]], 0
+// CHECK2-NEXT:    [[TMP52:%.*]] = extractvalue { i32, i1 } [[TMP50]], 1
+// CHECK2-NEXT:    br i1 [[TMP52]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
+// CHECK2:       atomic_exit:
 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK2:       .omp.reduction.default:
 // CHECK2-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]]
diff --git a/clang/test/OpenMP/predefined_macro.c b/clang/test/OpenMP/predefined_macro.c
index c9722d5..330dbe3 100644
--- a/clang/test/OpenMP/predefined_macro.c
+++ b/clang/test/OpenMP/predefined_macro.c
@@ -9,7 +9,7 @@
 // -fopenmp option is specified
 #ifndef _OPENMP
 #error "No _OPENMP macro is defined with -fopenmp option"
-#elsif _OPENMP != 201107
+#elif _OPENMP != 201811
 #error "_OPENMP has incorrect value"
 #endif //_OPENMP
 #else
diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp
index cae2ef3..8fb6675 100644
--- a/clang/test/OpenMP/reduction_implicit_map.cpp
+++ b/clang/test/OpenMP/reduction_implicit_map.cpp
@@ -502,8 +502,6 @@ int main()
 // CHECK1-NEXT:    [[DOTTASK_RED_:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    [[I:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8
-// CHECK1-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca double, align 8
-// CHECK1-NEXT:    [[_TMP30:%.*]] = alloca double, align 8
 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK1-NEXT:    store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8
@@ -668,39 +666,21 @@ int main()
 // CHECK1:       .omp.reduction.case2:
 // CHECK1-NEXT:    [[TMP59:%.*]] = getelementptr double, double* [[ARRAYIDX3]], i64 [[TMP5]]
 // CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq double* [[ARRAYIDX3]], [[TMP59]]
-// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE33:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]]
 // CHECK1:       omp.arraycpy.body27:
-// CHECK1-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi double* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ]
-// CHECK1-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi double* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ]
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi double* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY27]] ]
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi double* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY27]] ]
 // CHECK1-NEXT:    [[TMP60:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8
-// CHECK1-NEXT:    [[TMP61:%.*]] = bitcast double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]] to i64*
-// CHECK1-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* [[TMP61]] monotonic, align 8
-// CHECK1-NEXT:    br label [[ATOMIC_CONT:%.*]]
-// CHECK1:       atomic_cont:
-// CHECK1-NEXT:    [[TMP62:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY27]] ], [ [[TMP70:%.*]], [[ATOMIC_CONT]] ]
-// CHECK1-NEXT:    [[TMP63:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64*
-// CHECK1-NEXT:    [[TMP64:%.*]] = bitcast i64 [[TMP62]] to double
-// CHECK1-NEXT:    store double [[TMP64]], double* [[_TMP30]], align 8
-// CHECK1-NEXT:    [[TMP65:%.*]] = load double, double* [[_TMP30]], align 8
-// CHECK1-NEXT:    [[TMP66:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8
-// CHECK1-NEXT:    [[ADD31:%.*]] = fadd double [[TMP65]], [[TMP66]]
-// CHECK1-NEXT:    store double [[ADD31]], double* [[ATOMIC_TEMP]], align 8
-// CHECK1-NEXT:    [[TMP67:%.*]] = load i64, i64* [[TMP63]], align 8
-// CHECK1-NEXT:    [[TMP68:%.*]] = bitcast double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]] to i64*
-// CHECK1-NEXT:    [[TMP69:%.*]] = cmpxchg i64* [[TMP68]], i64 [[TMP62]], i64 [[TMP67]] monotonic monotonic, align 8
-// CHECK1-NEXT:    [[TMP70]] = extractvalue { i64, i1 } [[TMP69]], 0
-// CHECK1-NEXT:    [[TMP71:%.*]] = extractvalue { i64, i1 } [[TMP69]], 1
-// CHECK1-NEXT:    br i1 [[TMP71]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]]
-// CHECK1:       atomic_exit:
-// CHECK1-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1
-// CHECK1-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1
-// CHECK1-NEXT:    [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP59]]
-// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY27]]
-// CHECK1:       omp.arraycpy.done35:
+// CHECK1-NEXT:    [[TMP61:%.*]] = atomicrmw fadd double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], double [[TMP60]] monotonic, align 8
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT30]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT31]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP59]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_BODY27]]
+// CHECK1:       omp.arraycpy.done33:
 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK1:       .omp.reduction.default:
-// CHECK1-NEXT:    [[TMP72:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP72]])
+// CHECK1-NEXT:    [[TMP62:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP62]])
 // CHECK1-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/sections_reduction_codegen.cpp b/clang/test/OpenMP/sections_reduction_codegen.cpp
index f2e636e..7759b09 100644
--- a/clang/test/OpenMP/sections_reduction_codegen.cpp
+++ b/clang/test/OpenMP/sections_reduction_codegen.cpp
@@ -268,11 +268,9 @@ int main() {
 // CHECK1-NEXT:    [[T_VAR15:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8
 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK1-NEXT:    [[REF_TMP16:%.*]] = alloca [[STRUCT_S]], align 4
 // CHECK1-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca float, align 4
-// CHECK1-NEXT:    [[REF_TMP17:%.*]] = alloca [[STRUCT_S]], align 4
-// CHECK1-NEXT:    [[ATOMIC_TEMP27:%.*]] = alloca float, align 4
-// CHECK1-NEXT:    [[_TMP28:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK1-NEXT:    store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8
@@ -401,77 +399,59 @@ int main() {
 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK1:       .omp.reduction.case2:
 // CHECK1-NEXT:    [[TMP43:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK1-NEXT:    [[TMP44:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK1-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP44]] monotonic, align 4
-// CHECK1-NEXT:    br label [[ATOMIC_CONT:%.*]]
-// CHECK1:       atomic_cont:
-// CHECK1-NEXT:    [[TMP45:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP53:%.*]], [[ATOMIC_CONT]] ]
-// CHECK1-NEXT:    [[TMP46:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
-// CHECK1-NEXT:    [[TMP47:%.*]] = bitcast i32 [[TMP45]] to float
-// CHECK1-NEXT:    store float [[TMP47]], float* [[TMP]], align 4
-// CHECK1-NEXT:    [[TMP48:%.*]] = load float, float* [[TMP]], align 4
-// CHECK1-NEXT:    [[TMP49:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK1-NEXT:    [[ADD15:%.*]] = fadd float [[TMP48]], [[TMP49]]
-// CHECK1-NEXT:    store float [[ADD15]], float* [[ATOMIC_TEMP]], align 4
-// CHECK1-NEXT:    [[TMP50:%.*]] = load i32, i32* [[TMP46]], align 4
-// CHECK1-NEXT:    [[TMP51:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK1-NEXT:    [[TMP52:%.*]] = cmpxchg i32* [[TMP51]], i32 [[TMP45]], i32 [[TMP50]] monotonic monotonic, align 4
-// CHECK1-NEXT:    [[TMP53]] = extractvalue { i32, i1 } [[TMP52]], 0
-// CHECK1-NEXT:    [[TMP54:%.*]] = extractvalue { i32, i1 } [[TMP52]], 1
-// CHECK1-NEXT:    br i1 [[TMP54]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK1:       atomic_exit:
+// CHECK1-NEXT:    [[TMP44:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP43]] monotonic, align 4
 // CHECK1-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT:    [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
-// CHECK1-NEXT:    [[TMP55:%.*]] = bitcast %struct.S* [[TMP1]] to i8*
-// CHECK1-NEXT:    [[TMP56:%.*]] = bitcast %struct.S* [[CALL16]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP55]], i8* align 4 [[TMP56]], i64 4, i1 false)
+// CHECK1-NEXT:    [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
+// CHECK1-NEXT:    [[TMP45:%.*]] = bitcast %struct.S* [[TMP1]] to i8*
+// CHECK1-NEXT:    [[TMP46:%.*]] = bitcast %struct.S* [[CALL15]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false)
 // CHECK1-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
 // CHECK1-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT:    [[CALL18:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
-// CHECK1-NEXT:    [[TOBOOL19:%.*]] = fcmp une float [[CALL18]], 0.000000e+00
-// CHECK1-NEXT:    br i1 [[TOBOOL19]], label [[LAND_RHS20:%.*]], label [[LAND_END23:%.*]]
-// CHECK1:       land.rhs20:
-// CHECK1-NEXT:    [[CALL21:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
-// CHECK1-NEXT:    [[TOBOOL22:%.*]] = fcmp une float [[CALL21]], 0.000000e+00
-// CHECK1-NEXT:    br label [[LAND_END23]]
-// CHECK1:       land.end23:
-// CHECK1-NEXT:    [[TMP57:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL22]], [[LAND_RHS20]] ]
-// CHECK1-NEXT:    [[CONV24:%.*]] = uitofp i1 [[TMP57]] to float
-// CHECK1-NEXT:    call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP17]], float noundef [[CONV24]])
-// CHECK1-NEXT:    [[TMP58:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
-// CHECK1-NEXT:    [[TMP59:%.*]] = bitcast %struct.S* [[REF_TMP17]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP58]], i8* align 4 [[TMP59]], i64 4, i1 false)
-// CHECK1-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP17]]) #[[ATTR4]]
+// CHECK1-NEXT:    [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
+// CHECK1-NEXT:    [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00
+// CHECK1-NEXT:    br i1 [[TOBOOL18]], label [[LAND_RHS19:%.*]], label [[LAND_END22:%.*]]
+// CHECK1:       land.rhs19:
+// CHECK1-NEXT:    [[CALL20:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
+// CHECK1-NEXT:    [[TOBOOL21:%.*]] = fcmp une float [[CALL20]], 0.000000e+00
+// CHECK1-NEXT:    br label [[LAND_END22]]
+// CHECK1:       land.end22:
+// CHECK1-NEXT:    [[TMP47:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL21]], [[LAND_RHS19]] ]
+// CHECK1-NEXT:    [[CONV23:%.*]] = uitofp i1 [[TMP47]] to float
+// CHECK1-NEXT:    call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]], float noundef [[CONV23]])
+// CHECK1-NEXT:    [[TMP48:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
+// CHECK1-NEXT:    [[TMP49:%.*]] = bitcast %struct.S* [[REF_TMP16]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP48]], i8* align 4 [[TMP49]], i64 4, i1 false)
+// CHECK1-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR4]]
 // CHECK1-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT:    [[TMP60:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK1-NEXT:    [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK1-NEXT:    [[ATOMIC_LOAD25:%.*]] = load atomic i32, i32* [[TMP61]] monotonic, align 4
-// CHECK1-NEXT:    br label [[ATOMIC_CONT26:%.*]]
-// CHECK1:       atomic_cont26:
-// CHECK1-NEXT:    [[TMP62:%.*]] = phi i32 [ [[ATOMIC_LOAD25]], [[LAND_END23]] ], [ [[TMP72:%.*]], [[COND_END32:%.*]] ]
-// CHECK1-NEXT:    [[TMP63:%.*]] = bitcast float* [[ATOMIC_TEMP27]] to i32*
-// CHECK1-NEXT:    [[TMP64:%.*]] = bitcast i32 [[TMP62]] to float
-// CHECK1-NEXT:    store float [[TMP64]], float* [[_TMP28]], align 4
-// CHECK1-NEXT:    [[TMP65:%.*]] = load float, float* [[_TMP28]], align 4
-// CHECK1-NEXT:    [[TMP66:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK1-NEXT:    [[CMP29:%.*]] = fcmp olt float [[TMP65]], [[TMP66]]
-// CHECK1-NEXT:    br i1 [[CMP29]], label [[COND_TRUE30:%.*]], label [[COND_FALSE31:%.*]]
-// CHECK1:       cond.true30:
-// CHECK1-NEXT:    [[TMP67:%.*]] = load float, float* [[_TMP28]], align 4
-// CHECK1-NEXT:    br label [[COND_END32]]
-// CHECK1:       cond.false31:
-// CHECK1-NEXT:    [[TMP68:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK1-NEXT:    br label [[COND_END32]]
-// CHECK1:       cond.end32:
-// CHECK1-NEXT:    [[COND33:%.*]] = phi float [ [[TMP67]], [[COND_TRUE30]] ], [ [[TMP68]], [[COND_FALSE31]] ]
-// CHECK1-NEXT:    store float [[COND33]], float* [[ATOMIC_TEMP27]], align 4
-// CHECK1-NEXT:    [[TMP69:%.*]] = load i32, i32* [[TMP63]], align 4
-// CHECK1-NEXT:    [[TMP70:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK1-NEXT:    [[TMP71:%.*]] = cmpxchg i32* [[TMP70]], i32 [[TMP62]], i32 [[TMP69]] monotonic monotonic, align 4
-// CHECK1-NEXT:    [[TMP72]] = extractvalue { i32, i1 } [[TMP71]], 0
-// CHECK1-NEXT:    [[TMP73:%.*]] = extractvalue { i32, i1 } [[TMP71]], 1
-// CHECK1-NEXT:    br i1 [[TMP73]], label [[ATOMIC_EXIT34:%.*]], label [[ATOMIC_CONT26]]
-// CHECK1:       atomic_exit34:
+// CHECK1-NEXT:    [[TMP50:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK1-NEXT:    [[TMP51:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK1-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP51]] monotonic, align 4
+// CHECK1-NEXT:    br label [[ATOMIC_CONT:%.*]]
+// CHECK1:       atomic_cont:
+// CHECK1-NEXT:    [[TMP52:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END22]] ], [ [[TMP62:%.*]], [[COND_END27:%.*]] ]
+// CHECK1-NEXT:    [[TMP53:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
+// CHECK1-NEXT:    [[TMP54:%.*]] = bitcast i32 [[TMP52]] to float
+// CHECK1-NEXT:    store float [[TMP54]], float* [[TMP]], align 4
+// CHECK1-NEXT:    [[TMP55:%.*]] = load float, float* [[TMP]], align 4
+// CHECK1-NEXT:    [[TMP56:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK1-NEXT:    [[CMP24:%.*]] = fcmp olt float [[TMP55]], [[TMP56]]
+// CHECK1-NEXT:    br i1 [[CMP24]], label [[COND_TRUE25:%.*]], label [[COND_FALSE26:%.*]]
+// CHECK1:       cond.true25:
+// CHECK1-NEXT:    [[TMP57:%.*]] = load float, float* [[TMP]], align 4
+// CHECK1-NEXT:    br label [[COND_END27]]
+// CHECK1:       cond.false26:
+// CHECK1-NEXT:    [[TMP58:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK1-NEXT:    br label [[COND_END27]]
+// CHECK1:       cond.end27:
+// CHECK1-NEXT:    [[COND28:%.*]] = phi float [ [[TMP57]], [[COND_TRUE25]] ], [ [[TMP58]], [[COND_FALSE26]] ]
+// CHECK1-NEXT:    store float [[COND28]], float* [[ATOMIC_TEMP]], align 4
+// CHECK1-NEXT:    [[TMP59:%.*]] = load i32, i32* [[TMP53]], align 4
+// CHECK1-NEXT:    [[TMP60:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK1-NEXT:    [[TMP61:%.*]] = cmpxchg i32* [[TMP60]], i32 [[TMP52]], i32 [[TMP59]] monotonic monotonic, align 4
+// CHECK1-NEXT:    [[TMP62]] = extractvalue { i32, i1 } [[TMP61]], 0
+// CHECK1-NEXT:    [[TMP63:%.*]] = extractvalue { i32, i1 } [[TMP61]], 1
+// CHECK1-NEXT:    br i1 [[TMP63]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
+// CHECK1:       atomic_exit:
 // CHECK1-NEXT:    call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var)
 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK1:       .omp.reduction.default:
@@ -1100,11 +1080,9 @@ int main() {
 // CHECK2-NEXT:    [[T_VAR15:%.*]] = alloca float, align 4
 // CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8
 // CHECK2-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK2-NEXT:    [[REF_TMP16:%.*]] = alloca [[STRUCT_S]], align 4
 // CHECK2-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca float, align 4
 // CHECK2-NEXT:    [[TMP:%.*]] = alloca float, align 4
-// CHECK2-NEXT:    [[REF_TMP17:%.*]] = alloca [[STRUCT_S]], align 4
-// CHECK2-NEXT:    [[ATOMIC_TEMP27:%.*]] = alloca float, align 4
-// CHECK2-NEXT:    [[_TMP28:%.*]] = alloca float, align 4
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK2-NEXT:    store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8
@@ -1233,77 +1211,59 @@ int main() {
 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK2:       .omp.reduction.case2:
 // CHECK2-NEXT:    [[TMP43:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK2-NEXT:    [[TMP44:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK2-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP44]] monotonic, align 4
-// CHECK2-NEXT:    br label [[ATOMIC_CONT:%.*]]
-// CHECK2:       atomic_cont:
-// CHECK2-NEXT:    [[TMP45:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP53:%.*]], [[ATOMIC_CONT]] ]
-// CHECK2-NEXT:    [[TMP46:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
-// CHECK2-NEXT:    [[TMP47:%.*]] = bitcast i32 [[TMP45]] to float
-// CHECK2-NEXT:    store float [[TMP47]], float* [[TMP]], align 4
-// CHECK2-NEXT:    [[TMP48:%.*]] = load float, float* [[TMP]], align 4
-// CHECK2-NEXT:    [[TMP49:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK2-NEXT:    [[ADD15:%.*]] = fadd float [[TMP48]], [[TMP49]]
-// CHECK2-NEXT:    store float [[ADD15]], float* [[ATOMIC_TEMP]], align 4
-// CHECK2-NEXT:    [[TMP50:%.*]] = load i32, i32* [[TMP46]], align 4
-// CHECK2-NEXT:    [[TMP51:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK2-NEXT:    [[TMP52:%.*]] = cmpxchg i32* [[TMP51]], i32 [[TMP45]], i32 [[TMP50]] monotonic monotonic, align 4
-// CHECK2-NEXT:    [[TMP53]] = extractvalue { i32, i1 } [[TMP52]], 0
-// CHECK2-NEXT:    [[TMP54:%.*]] = extractvalue { i32, i1 } [[TMP52]], 1
-// CHECK2-NEXT:    br i1 [[TMP54]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK2:       atomic_exit:
+// CHECK2-NEXT:    [[TMP44:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP43]] monotonic, align 4
 // CHECK2-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT:    [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
-// CHECK2-NEXT:    [[TMP55:%.*]] = bitcast %struct.S* [[TMP1]] to i8*
-// CHECK2-NEXT:    [[TMP56:%.*]] = bitcast %struct.S* [[CALL16]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP55]], i8* align 4 [[TMP56]], i64 4, i1 false)
+// CHECK2-NEXT:    [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
+// CHECK2-NEXT:    [[TMP45:%.*]] = bitcast %struct.S* [[TMP1]] to i8*
+// CHECK2-NEXT:    [[TMP46:%.*]] = bitcast %struct.S* [[CALL15]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false)
 // CHECK2-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
 // CHECK2-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT:    [[CALL18:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
-// CHECK2-NEXT:    [[TOBOOL19:%.*]] = fcmp une float [[CALL18]], 0.000000e+00
-// CHECK2-NEXT:    br i1 [[TOBOOL19]], label [[LAND_RHS20:%.*]], label [[LAND_END23:%.*]]
-// CHECK2:       land.rhs20:
-// CHECK2-NEXT:    [[CALL21:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
-// CHECK2-NEXT:    [[TOBOOL22:%.*]] = fcmp une float [[CALL21]], 0.000000e+00
-// CHECK2-NEXT:    br label [[LAND_END23]]
-// CHECK2:       land.end23:
-// CHECK2-NEXT:    [[TMP57:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL22]], [[LAND_RHS20]] ]
-// CHECK2-NEXT:    [[CONV24:%.*]] = uitofp i1 [[TMP57]] to float
-// CHECK2-NEXT:    call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP17]], float noundef [[CONV24]])
-// CHECK2-NEXT:    [[TMP58:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
-// CHECK2-NEXT:    [[TMP59:%.*]] = bitcast %struct.S* [[REF_TMP17]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP58]], i8* align 4 [[TMP59]], i64 4, i1 false)
-// CHECK2-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP17]]) #[[ATTR4]]
+// CHECK2-NEXT:    [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
+// CHECK2-NEXT:    [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00
+// CHECK2-NEXT:    br i1 [[TOBOOL18]], label [[LAND_RHS19:%.*]], label [[LAND_END22:%.*]]
+// CHECK2:       land.rhs19:
+// CHECK2-NEXT:    [[CALL20:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
+// CHECK2-NEXT:    [[TOBOOL21:%.*]] = fcmp une float [[CALL20]], 0.000000e+00
+// CHECK2-NEXT:    br label [[LAND_END22]]
+// CHECK2:       land.end22:
+// CHECK2-NEXT:    [[TMP47:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL21]], [[LAND_RHS19]] ]
+// CHECK2-NEXT:    [[CONV23:%.*]] = uitofp i1 [[TMP47]] to float
+// CHECK2-NEXT:    call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]], float noundef [[CONV23]])
+// CHECK2-NEXT:    [[TMP48:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
+// CHECK2-NEXT:    [[TMP49:%.*]] = bitcast %struct.S* [[REF_TMP16]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP48]], i8* align 4 [[TMP49]], i64 4, i1 false)
+// CHECK2-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR4]]
 // CHECK2-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT:    [[TMP60:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK2-NEXT:    [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK2-NEXT:    [[ATOMIC_LOAD25:%.*]] = load atomic i32, i32* [[TMP61]] monotonic, align 4
-// CHECK2-NEXT:    br label [[ATOMIC_CONT26:%.*]]
-// CHECK2:       atomic_cont26:
-// CHECK2-NEXT:    [[TMP62:%.*]] = phi i32 [ [[ATOMIC_LOAD25]], [[LAND_END23]] ], [ [[TMP72:%.*]], [[COND_END32:%.*]] ]
-// CHECK2-NEXT:    [[TMP63:%.*]] = bitcast float* [[ATOMIC_TEMP27]] to i32*
-// CHECK2-NEXT:    [[TMP64:%.*]] = bitcast i32 [[TMP62]] to float
-// CHECK2-NEXT:    store float [[TMP64]], float* [[_TMP28]], align 4
-// CHECK2-NEXT:    [[TMP65:%.*]] = load float, float* [[_TMP28]], align 4
-// CHECK2-NEXT:    [[TMP66:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK2-NEXT:    [[CMP29:%.*]] = fcmp olt float [[TMP65]], [[TMP66]]
-// CHECK2-NEXT:    br i1 [[CMP29]], label [[COND_TRUE30:%.*]], label [[COND_FALSE31:%.*]]
-// CHECK2:       cond.true30:
-// CHECK2-NEXT:    [[TMP67:%.*]] = load float, float* [[_TMP28]], align 4
-// CHECK2-NEXT:    br label [[COND_END32]]
-// CHECK2:       cond.false31:
-// CHECK2-NEXT:    [[TMP68:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK2-NEXT:    br label [[COND_END32]]
-// CHECK2:       cond.end32:
-// CHECK2-NEXT:    [[COND33:%.*]] = phi float [ [[TMP67]], [[COND_TRUE30]] ], [ [[TMP68]], [[COND_FALSE31]] ]
-// CHECK2-NEXT:    store float [[COND33]], float* [[ATOMIC_TEMP27]], align 4
-// CHECK2-NEXT:    [[TMP69:%.*]] = load i32, i32* [[TMP63]], align 4
-// CHECK2-NEXT:    [[TMP70:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK2-NEXT:    [[TMP71:%.*]] = cmpxchg i32* [[TMP70]], i32 [[TMP62]], i32 [[TMP69]] monotonic monotonic, align 4
-// CHECK2-NEXT:    [[TMP72]] = extractvalue { i32, i1 } [[TMP71]], 0
-// CHECK2-NEXT:    [[TMP73:%.*]] = extractvalue { i32, i1 } [[TMP71]], 1
-// CHECK2-NEXT:    br i1 [[TMP73]], label [[ATOMIC_EXIT34:%.*]], label [[ATOMIC_CONT26]]
-// CHECK2:       atomic_exit34:
+// CHECK2-NEXT:    [[TMP50:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK2-NEXT:    [[TMP51:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK2-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP51]] monotonic, align 4
+// CHECK2-NEXT:    br label [[ATOMIC_CONT:%.*]]
+// CHECK2:       atomic_cont:
+// CHECK2-NEXT:    [[TMP52:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END22]] ], [ [[TMP62:%.*]], [[COND_END27:%.*]] ]
+// CHECK2-NEXT:    [[TMP53:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
+// CHECK2-NEXT:    [[TMP54:%.*]] = bitcast i32 [[TMP52]] to float
+// CHECK2-NEXT:    store float [[TMP54]], float* [[TMP]], align 4
+// CHECK2-NEXT:    [[TMP55:%.*]] = load float, float* [[TMP]], align 4
+// CHECK2-NEXT:    [[TMP56:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK2-NEXT:    [[CMP24:%.*]] = fcmp olt float [[TMP55]], [[TMP56]]
+// CHECK2-NEXT:    br i1 [[CMP24]], label [[COND_TRUE25:%.*]], label [[COND_FALSE26:%.*]]
+// CHECK2:       cond.true25:
+// CHECK2-NEXT:    [[TMP57:%.*]] = load float, float* [[TMP]], align 4
+// CHECK2-NEXT:    br label [[COND_END27]]
+// CHECK2:       cond.false26:
+// CHECK2-NEXT:    [[TMP58:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK2-NEXT:    br label [[COND_END27]]
+// CHECK2:       cond.end27:
+// CHECK2-NEXT:    [[COND28:%.*]] = phi float [ [[TMP57]], [[COND_TRUE25]] ], [ [[TMP58]], [[COND_FALSE26]] ]
+// CHECK2-NEXT:    store float [[COND28]], float* [[ATOMIC_TEMP]], align 4
+// CHECK2-NEXT:    [[TMP59:%.*]] = load i32, i32* [[TMP53]], align 4
+// CHECK2-NEXT:    [[TMP60:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK2-NEXT:    [[TMP61:%.*]] = cmpxchg i32* [[TMP60]], i32 [[TMP52]], i32 [[TMP59]] monotonic monotonic, align 4
+// CHECK2-NEXT:    [[TMP62]] = extractvalue { i32, i1 } [[TMP61]], 0
+// CHECK2-NEXT:    [[TMP63:%.*]] = extractvalue { i32, i1 } [[TMP61]], 1
+// CHECK2-NEXT:    br i1 [[TMP63]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
+// CHECK2:       atomic_exit:
 // CHECK2-NEXT:    call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var)
 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK2:       .omp.reduction.default:
@@ -1868,8 +1828,6 @@ int main() {
 // CHECK3-NEXT:    [[G:%.*]] = alloca double, align 8
 // CHECK3-NEXT:    [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-// CHECK3-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca double, align 8
-// CHECK3-NEXT:    [[TMP:%.*]] = alloca double, align 8
 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
@@ -1933,23 +1891,7 @@ int main() {
 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK3:       .omp.reduction.case2:
 // CHECK3-NEXT:    [[TMP17:%.*]] = load double, double* [[G]], align 8
-// CHECK3-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* bitcast (double* @g to i64*) monotonic, align 8
-// CHECK3-NEXT:    br label [[ATOMIC_CONT:%.*]]
-// CHECK3:       atomic_cont:
-// CHECK3-NEXT:    [[TMP18:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP25:%.*]], [[ATOMIC_CONT]] ]
-// CHECK3-NEXT:    [[TMP19:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64*
-// CHECK3-NEXT:    [[TMP20:%.*]] = bitcast i64 [[TMP18]] to double
-// CHECK3-NEXT:    store double [[TMP20]], double* [[TMP]], align 8
-// CHECK3-NEXT:    [[TMP21:%.*]] = load double, double* [[TMP]], align 8
-// CHECK3-NEXT:    [[TMP22:%.*]] = load double, double* [[G]], align 8
-// CHECK3-NEXT:    [[ADD2:%.*]] = fadd double [[TMP21]], [[TMP22]]
-// CHECK3-NEXT:    store double [[ADD2]], double* [[ATOMIC_TEMP]], align 8
-// CHECK3-NEXT:    [[TMP23:%.*]] = load i64, i64* [[TMP19]], align 8
-// CHECK3-NEXT:    [[TMP24:%.*]] = cmpxchg i64* bitcast (double* @g to i64*), i64 [[TMP18]], i64 [[TMP23]] monotonic monotonic, align 8
-// CHECK3-NEXT:    [[TMP25]] = extractvalue { i64, i1 } [[TMP24]], 0
-// CHECK3-NEXT:    [[TMP26:%.*]] = extractvalue { i64, i1 } [[TMP24]], 1
-// CHECK3-NEXT:    br i1 [[TMP26]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK3:       atomic_exit:
+// CHECK3-NEXT:    [[TMP18:%.*]] = atomicrmw fadd double* @g, double [[TMP17]] monotonic, align 8
 // CHECK3-NEXT:    call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], [8 x i32]* @.gomp_critical_user_.reduction.var)
 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK3:       .omp.reduction.default:
@@ -2017,8 +1959,6 @@ int main() {
 // CHECK4-NEXT:    [[G:%.*]] = alloca double, align 8
 // CHECK4-NEXT:    [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double }>, align 8
 // CHECK4-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-// CHECK4-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca double, align 8
-// CHECK4-NEXT:    [[TMP:%.*]] = alloca double, align 8
 // CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
@@ -2099,23 +2039,7 @@ int main() {
 // CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK4:       .omp.reduction.case2:
 // CHECK4-NEXT:    [[TMP22:%.*]] = load double, double* [[G]], align 8
-// CHECK4-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* bitcast (double* @g to i64*) monotonic, align 8
-// CHECK4-NEXT:    br label [[ATOMIC_CONT:%.*]]
-// CHECK4:       atomic_cont:
-// CHECK4-NEXT:    [[TMP23:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP30:%.*]], [[ATOMIC_CONT]] ]
-// CHECK4-NEXT:    [[TMP24:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64*
-// CHECK4-NEXT:    [[TMP25:%.*]] = bitcast i64 [[TMP23]] to double
-// CHECK4-NEXT:    store double [[TMP25]], double* [[TMP]], align 8
-// CHECK4-NEXT:    [[TMP26:%.*]] = load double, double* [[TMP]], align 8
-// CHECK4-NEXT:    [[TMP27:%.*]] = load double, double* [[G]], align 8
-// CHECK4-NEXT:    [[ADD2:%.*]] = fadd double [[TMP26]], [[TMP27]]
-// CHECK4-NEXT:    store double [[ADD2]], double* [[ATOMIC_TEMP]], align 8
-// CHECK4-NEXT:    [[TMP28:%.*]] = load i64, i64* [[TMP24]], align 8
-// CHECK4-NEXT:    [[TMP29:%.*]] = cmpxchg i64* bitcast (double* @g to i64*), i64 [[TMP23]], i64 [[TMP28]] monotonic monotonic, align 8
-// CHECK4-NEXT:    [[TMP30]] = extractvalue { i64, i1 } [[TMP29]], 0
-// CHECK4-NEXT:    [[TMP31:%.*]] = extractvalue { i64, i1 } [[TMP29]], 1
-// CHECK4-NEXT:    br i1 [[TMP31]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK4:       atomic_exit:
+// CHECK4-NEXT:    [[TMP23:%.*]] = atomicrmw fadd double* @g, double [[TMP22]] monotonic, align 8
 // CHECK4-NEXT:    call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], [8 x i32]* @.gomp_critical_user_.reduction.var)
 // CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK4:       .omp.reduction.default:
diff --git a/clang/test/Preprocessor/avr-common.c b/clang/test/Preprocessor/avr-common.c
index 8a03c1d..979dc52 100644
--- a/clang/test/Preprocessor/avr-common.c
+++ b/clang/test/Preprocessor/avr-common.c
@@ -1,6 +1,11 @@
-// RUN: %clang_cc1 -E -dM -triple avr-unknown-unknown /dev/null | FileCheck -match-full-lines %s
+// RUN: %clang_cc1 -E -dM -triple avr-unknown-unknown -target-cpu attiny13 /dev/null | FileCheck -match-full-lines --check-prefixes=CHECK,AVR %s
+// RUN: %clang_cc1 -E -dM -triple avr-unknown-unknown -target-cpu attiny4 /dev/null | FileCheck -match-full-lines --check-prefixes=CHECK,TINY %s
 
 // CHECK: #define AVR 1
 // CHECK: #define __AVR 1
+
+// TINY: #define __AVR_TINY__ 1
+// AVR-NOT: __AVR_TINY__
+
 // CHECK: #define __AVR__ 1
 // CHECK: #define __ELF__ 1
diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c
index a729226..10c95a1 100644
--- a/clang/test/Sema/builtins-reduction-math.c
+++ b/clang/test/Sema/builtins-reduction-math.c
@@ -36,6 +36,23 @@ void test_builtin_reduce_min(int i, float4 v, int3 iv) {
   // expected-error@-1 {{1st argument must be a vector type (was 'int')}}
 }
 
+void test_builtin_reduce_add(int i, float4 v, int3 iv) {
+  struct Foo s = __builtin_reduce_add(iv);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+  i = __builtin_reduce_add();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_reduce_add(iv, iv);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+
+  i = __builtin_reduce_add(i);
+  // expected-error@-1 {{1st argument must be a vector of integers (was 'int')}}
+
+  i = __builtin_reduce_add(v);
+  // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
+}
+
 void test_builtin_reduce_xor(int i, float4 v, int3 iv) {
   struct Foo s = __builtin_reduce_xor(iv);
   // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
diff --git a/clang/test/SemaCXX/aarch64-sve-vector-conditional-op.cpp b/clang/test/SemaCXX/aarch64-sve-vector-conditional-op.cpp
new file mode 100644
index 0000000..a8fbf9e
--- /dev/null
+++ b/clang/test/SemaCXX/aarch64-sve-vector-conditional-op.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -verify -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+void cond(svint8_t i8, svint16_t i16, svint32_t i32, svint64_t i64,
+          svuint8_t u8, svuint16_t u16, svuint32_t u32, svuint64_t u64,
+          svfloat16_t f16, svfloat32_t f32, svfloat64_t f64,
+          svbool_t b) {
+  (void) i8 < i8 ? i16 : i16; // expected-error{{invalid operands to binary expression}}
+  (void) i8 < i8 ? i32 : i32; // expected-error{{invalid operands to binary expression}}
+  (void) i8 < i8 ? i64 : i64; // expected-error{{invalid operands to binary expression}}
+
+  (void) i16 < i16 ? i16 : i8; // expected-error{{invalid operands to binary expression}}
+  (void) i16 < i16 ? i16 : i32; // expected-error{{invalid operands to binary expression}}
+  (void) i16 < i16 ? i16 : i64; // expected-error{{invalid operands to binary expression}}
+
+  (void) i16 < i16 ? i8 : i16; // expected-error{{invalid operands to binary expression}}
+  (void) i16 < i16 ? i32 : i16; // expected-error{{invalid operands to binary expression}}
+  (void) i16 < i16 ? i64 : i16; // expected-error{{invalid operands to binary expression}}
+}
+\ No newline at end of file
diff --git a/clang/test/SemaCXX/ms-friend-function-decl.cpp b/clang/test/SemaCXX/ms-friend-function-decl.cpp
new file mode 100644
index 0000000..d146305
--- /dev/null
+++ b/clang/test/SemaCXX/ms-friend-function-decl.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -std=c++03 -fms-compatibility -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=c++17 -fms-compatibility -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=c++20 -fms-compatibility -fsyntax-only -verify=modern %s
+#if __cplusplus < 202002L
+// expected-no-diagnostics
+#endif
+
+namespace ns {
+
+class C {
+public:
+  template <typename T>
+  friend void funtemp();
+
+  friend void fun();
+
+  void test() {
+    ::ns::fun(); // modern-error {{no member named 'fun' in namespace 'ns'}}
+
+    // modern-error@+3 {{no member named 'funtemp' in namespace 'ns'}}
+    // modern-error@+2 {{expected '(' for function-style cast or type construction}}
+    // modern-error@+1 {{expected expression}}
+    ::ns::funtemp<int>();
+  }
+};
+
+void fun() {
+}
+
+template <typename T>
+void funtemp() {}
+
+} // namespace ns
+
+class Glob {
+public:
+  friend void funGlob();
+
+  void test() {
+    funGlob(); // modern-error {{use of undeclared identifier 'funGlob'}}
+  }
+};
+
+void funGlob() {
+}
diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp
index 856010c..2cda013 100644
--- a/clang/unittests/AST/ASTImporterTest.cpp
+++ b/clang/unittests/AST/ASTImporterTest.cpp
@@ -2658,7 +2658,10 @@ TEST_P(ImportFriendFunctions, Lookup) {
       getTuDecl("struct X { friend void f(); };", Lang_CXX03, "input0.cc");
   auto *FromD = FirstDeclMatcher<FunctionDecl>().match(FromTU, FunctionPattern);
   ASSERT_TRUE(FromD->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend));
-  ASSERT_FALSE(FromD->isInIdentifierNamespace(Decl::IDNS_Ordinary));
+  // Before CXX20, MSVC treats friend function declarations as function
+  // declarations
+  ASSERT_EQ(FromTU->getLangOpts().MSVCCompat,
+            FromD->isInIdentifierNamespace(Decl::IDNS_Ordinary));
   {
     auto FromName = FromD->getDeclName();
     auto *Class = FirstDeclMatcher<CXXRecordDecl>().match(FromTU, ClassPattern);
@@ -2702,7 +2705,10 @@ TEST_P(ImportFriendFunctions, LookupWithProtoAfter) {
   auto *FromNormal =
       LastDeclMatcher<FunctionDecl>().match(FromTU, FunctionPattern);
   ASSERT_TRUE(FromFriend->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend));
-  ASSERT_FALSE(FromFriend->isInIdentifierNamespace(Decl::IDNS_Ordinary));
+  // Before CXX20, MSVC treats friend function declarations as function
+  // declarations
+  ASSERT_EQ(FromTU->getLangOpts().MSVCCompat,
+            FromFriend->isInIdentifierNamespace(Decl::IDNS_Ordinary));
   ASSERT_FALSE(FromNormal->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend));
   ASSERT_TRUE(FromNormal->isInIdentifierNamespace(Decl::IDNS_Ordinary));
 
@@ -2793,7 +2799,10 @@ TEST_P(ImportFriendFunctions, ImportFriendChangesLookup) {
 
   ASSERT_TRUE(FromNormalF->isInIdentifierNamespace(Decl::IDNS_Ordinary));
   ASSERT_FALSE(FromNormalF->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend));
-  ASSERT_FALSE(FromFriendF->isInIdentifierNamespace(Decl::IDNS_Ordinary));
+  // Before CXX20, MSVC treats friend function declarations as function
+  // declarations
+  ASSERT_EQ(FromFriendTU->getLangOpts().MSVCCompat,
+            FromFriendF->isInIdentifierNamespace(Decl::IDNS_Ordinary));
   ASSERT_TRUE(FromFriendF->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend));
   auto LookupRes = FromNormalTU->noload_lookup(FromNormalName);
   ASSERT_TRUE(LookupRes.isSingleResult());
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index c685fa1..37f3269 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -3336,6 +3336,14 @@ TEST_F(FormatTest, UnderstandsAccessSpecifiers) {
   verifyFormat("if (public == private)\n");
   verifyFormat("void foo(public, private)");
   verifyFormat("public::foo();");
+
+  verifyFormat("class A {\n"
+               "public:\n"
+               "  std::unique_ptr<int *[]> b() { return nullptr; }\n"
+               "\n"
+               "private:\n"
+               "  int c;\n"
+               "};");
 }
 
 TEST_F(FormatTest, SeparatesLogicalBlocks) {
@@ -16022,6 +16030,10 @@ TEST_F(FormatTest, AlignConsecutiveMacros) {
                "#define ccc  (5)",
                Style);
 
+  verifyFormat("#define true  1\n"
+               "#define false 0",
+               Style);
+
   verifyFormat("#define f(x)         (x * x)\n"
                "#define fff(x, y, z) (x * y + z)\n"
                "#define ffff(x, y)   (x - y)",
@@ -21487,6 +21499,7 @@ TEST_F(FormatTest, FormatsLambdas) {
                "  bar([]() {} // Did not respect SpacesBeforeTrailingComments\n"
                "  );\n"
                "}");
+  verifyFormat("auto k = *[](int *j) { return j; }(&i);");
 
   // Lambdas created through weird macros.
   verifyFormat("void f() {\n"
diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index b7bf2ba..27f68ea 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -153,6 +153,7 @@ endmacro()
 
 macro(detect_target_arch)
   check_symbol_exists(__arm__ "" __ARM)
+  check_symbol_exists(__AVR__ "" __AVR)
   check_symbol_exists(__aarch64__ "" __AARCH64)
   check_symbol_exists(__x86_64__ "" __X86_64)
   check_symbol_exists(__i386__ "" __I386)
@@ -170,6 +171,8 @@ macro(detect_target_arch)
   check_symbol_exists(__ve__ "" __VE)
   if(__ARM)
     add_default_target_arch(arm)
+  elseif(__AVR)
+    add_default_target_arch(avr)
   elseif(__AARCH64)
     add_default_target_arch(aarch64)
   elseif(__X86_64)
diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 2576667..8a621956 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -234,6 +234,8 @@ macro(test_targets)
         test_target_arch(armhf "" "-march=armv7-a" "-mfloat-abi=hard")
         test_target_arch(armv6m "" "-march=armv6m" "-mfloat-abi=soft")
       endif()
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "avr")
+      test_target_arch(avr "__AVR__" "--target=avr")
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "aarch32")
       test_target_arch(aarch32 "" "-march=armv8-a")
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "aarch64")
diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 5ea5cc0..9cbadf3 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -38,6 +38,7 @@ asm(\"cas w0, w1, [x2]\");
 
 set(ARM64 aarch64)
 set(ARM32 arm armhf armv6m armv7m armv7em armv7 armv7s armv7k armv8m.main armv8.1m.main)
+set(AVR avr)
 set(HEXAGON hexagon)
 set(X86 i386)
 set(X86_64 x86_64)
@@ -60,7 +61,7 @@ if(APPLE)
 endif()
 
 set(ALL_BUILTIN_SUPPORTED_ARCH
-  ${X86} ${X86_64} ${ARM32} ${ARM64}
+  ${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR}
   ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64}
   ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
   ${WASM32} ${WASM64} ${VE})
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 17e5156..8cd8059 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -203,8 +203,10 @@ file(WRITE ${SIMPLE_SOURCE} "#include <stdlib.h>\n#include <stdio.h>\nint main()
 
 # Detect whether the current target platform is 32-bit or 64-bit, and setup
 # the correct commandline flags needed to attempt to target 32-bit and 64-bit.
+# AVR and MSP430 are omitted since they have 16-bit pointers.
 if (NOT CMAKE_SIZEOF_VOID_P EQUAL 4 AND
-    NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+    NOT CMAKE_SIZEOF_VOID_P EQUAL 8 AND
+    NOT ${arch} MATCHES "avr|msp430")
   message(FATAL_ERROR "Please use architecture with 4 or 8 byte pointers.")
 endif()
 
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index d9c317e..3146689 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -567,6 +567,13 @@ set(armv7em_SOURCES ${arm_SOURCES})
 set(armv8m.main_SOURCES ${arm_SOURCES})
 set(armv8.1m.main_SOURCES ${arm_SOURCES})
 
+# 8-bit AVR MCU
+set(avr_SOURCES
+  avr/mulqi3.S
+  avr/mulhi3.S
+  avr/exit.S
+)
+
 # hexagon arch
 set(hexagon_SOURCES
   hexagon/common_entry_exit_abi1.S
diff --git a/compiler-rt/lib/builtins/avr/exit.S b/compiler-rt/lib/builtins/avr/exit.S
new file mode 100644
index 0000000..3cd9c5d
--- /dev/null
+++ b/compiler-rt/lib/builtins/avr/exit.S
@@ -0,0 +1,18 @@
+//===------------ exit.S - global terminator for AVR ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+	.text
+	.align 2
+
+	.globl _exit
+	.type  _exit, @function
+
+_exit:
+	cli                 ; Disable all interrupts.
+__stop_program:
+	rjmp __stop_program ; Fall into an infinite loop.
diff --git a/compiler-rt/lib/builtins/avr/mulhi3.S b/compiler-rt/lib/builtins/avr/mulhi3.S
new file mode 100644
index 0000000..97e9bd1
--- /dev/null
+++ b/compiler-rt/lib/builtins/avr/mulhi3.S
@@ -0,0 +1,58 @@
+//===------------ mulhi3.S - int16 multiplication -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The corresponding C code is something like:
+//
+// int __mulhi3(int A, int B) {
+//   int S = 0;
+//   while (A != 0) {
+//     if (A & 1)
+//       S += B;
+//     A = ((unsigned int) A) >> 1;
+//     B <<= 1;
+//   }
+//   return S;
+// }
+//
+//===----------------------------------------------------------------------===//
+
+	.text
+	.align 2
+
+	.globl __mulhi3
+	.type  __mulhi3, @function
+
+__mulhi3:
+	eor    r28, r28
+	eor    r20, r20
+	eor    r21, r21         ; Initialize the result to 0: `S = 0;`.
+
+__mulhi3_loop:
+	cp     r24, r28
+	cpc    r25, r28         ; `while (A != 0) { ... }`
+	breq   __mulhi3_end     ; End the loop if A is 0.
+
+	mov    r29, r24
+	andi   r29, 1           ; `if (A & 1) { ... }`
+	breq   __mulhi3_loop_a  ; Omit the accumulation (`S += B;`) if  A's LSB is 0.
+
+	add    r20, r22
+	adc    r21, r23         ; Do the accumulation: `S += B;`.
+
+__mulhi3_loop_a:
+	lsr    r25
+	ror    r24              ; `A = ((unsigned int) A) >> 1;`.
+	lsl    r22
+	rol    r23              ; `B <<= 1;`
+
+	rjmp   __mulhi3_loop
+
+__mulhi3_end:
+	mov    r24, r20
+	mov    r25, r21
+	ret
diff --git a/compiler-rt/lib/builtins/avr/mulqi3.S b/compiler-rt/lib/builtins/avr/mulqi3.S
new file mode 100644
index 0000000..be0fa77
--- /dev/null
+++ b/compiler-rt/lib/builtins/avr/mulqi3.S
@@ -0,0 +1,31 @@
+//===------------ mulhi3.S - int8 multiplication --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The corresponding C code is something like:
+//
+// int __mulqi3(char A, char B) {
+//   return __mulhi3((int) A, (int) B);
+// }
+//
+//===----------------------------------------------------------------------===//
+
+	.text
+	.align 2
+
+	.globl __mulqi3
+	.type  __mulqi3, @function
+
+__mulqi3:
+	mov    r25, r24
+	lsl    r25
+	sbc    r25, r25         ; Promote A from char to int: `(int) A`.
+	mov    r23, r22
+	lsl    r23
+	sbc    r23, r23         ; Promote B from char to int: `(int) B`.
+	rcall  __mulhi3         ; `__mulhi3((int) A, (int) B);`.
+	ret
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
index 6cfce8a..8fd3985 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
@@ -98,15 +98,19 @@ void MaybeStartBackgroudThread() {
 }
 
 #  if !SANITIZER_START_BACKGROUND_THREAD_IN_ASAN_INTERNAL
+#    ifdef __clang__
 #    pragma clang diagnostic push
 // We avoid global-constructors to be sure that globals are ready when
 // sanitizers need them. This can happend before global constructors executed.
 // Here we don't mind if thread is started on later stages.
 #    pragma clang diagnostic ignored "-Wglobal-constructors"
+#    endif
 static struct BackgroudThreadStarted {
   BackgroudThreadStarted() { MaybeStartBackgroudThread(); }
 } background_thread_strarter UNUSED;
+#    ifdef __clang__
 #    pragma clang diagnostic pop
+#    endif
 #  endif
 #else
 void MaybeStartBackgroudThread() {}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
index 924578b..ff65069 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -261,6 +261,8 @@ typedef u64 tid_t;
 
 #if __has_cpp_attribute(clang::fallthrough)
 #  define FALLTHROUGH [[clang::fallthrough]]
+#elif __has_cpp_attribute(fallthrough)
+#  define FALLTHROUGH [[fallthrough]]
 #else
 #  define FALLTHROUGH
 #endif
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index f591a5c..8e144a4 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -233,7 +233,7 @@ uptr internal_close(fd_t fd) {
 }
 
 uptr internal_open(const char *filename, int flags) {
-#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#    if SANITIZER_LINUX
   return internal_syscall(SYSCALL(openat), AT_FDCWD, (uptr)filename, flags);
 #else
   return internal_syscall(SYSCALL(open), (uptr)filename, flags);
@@ -241,7 +241,7 @@ uptr internal_open(const char *filename, int flags) {
 }
 
 uptr internal_open(const char *filename, int flags, u32 mode) {
-#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#    if SANITIZER_LINUX
   return internal_syscall(SYSCALL(openat), AT_FDCWD, (uptr)filename, flags,
                           mode);
 #else
@@ -342,50 +342,46 @@ static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) {
 uptr internal_stat(const char *path, void *buf) {
 #if SANITIZER_FREEBSD
   return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, (uptr)buf, 0);
-#elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#    elif SANITIZER_LINUX
+#      if SANITIZER_WORDSIZE == 64
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
                           0);
-#elif SANITIZER_LINUX_USES_64BIT_SYSCALLS
-# if defined(__mips64)
-  // For mips64, stat syscall fills buffer in the format of kernel_stat
-  struct kernel_stat kbuf;
-  int res = internal_syscall(SYSCALL(stat), path, &kbuf);
-  kernel_stat_to_stat(&kbuf, (struct stat *)buf);
+#      else
+  struct stat64 buf64;
+  int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
+                             (uptr)&buf64, 0);
+  stat64_to_stat(&buf64, (struct stat *)buf);
   return res;
-# else
-  return internal_syscall(SYSCALL(stat), (uptr)path, (uptr)buf);
-# endif
-#else
+#      endif
+#    else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(stat64), path, &buf64);
   stat64_to_stat(&buf64, (struct stat *)buf);
   return res;
-#endif
+#    endif
 }
 
 uptr internal_lstat(const char *path, void *buf) {
 #if SANITIZER_FREEBSD
   return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, (uptr)buf,
                           AT_SYMLINK_NOFOLLOW);
-#elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#    elif SANITIZER_LINUX
+#      if defined(_LP64)
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
                           AT_SYMLINK_NOFOLLOW);
-#elif SANITIZER_LINUX_USES_64BIT_SYSCALLS
-# if SANITIZER_MIPS64
-  // For mips64, lstat syscall fills buffer in the format of kernel_stat
-  struct kernel_stat kbuf;
-  int res = internal_syscall(SYSCALL(lstat), path, &kbuf);
-  kernel_stat_to_stat(&kbuf, (struct stat *)buf);
+#      else
+  struct stat64 buf64;
+  int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
+                             (uptr)&buf64, AT_SYMLINK_NOFOLLOW);
+  stat64_to_stat(&buf64, (struct stat *)buf);
   return res;
-# else
-  return internal_syscall(SYSCALL(lstat), (uptr)path, (uptr)buf);
-# endif
-#else
+#      endif
+#    else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(lstat64), path, &buf64);
   stat64_to_stat(&buf64, (struct stat *)buf);
   return res;
-#endif
+#    endif
 }
 
 uptr internal_fstat(fd_t fd, void *buf) {
@@ -419,7 +415,7 @@ uptr internal_dup(int oldfd) {
 }
 
 uptr internal_dup2(int oldfd, int newfd) {
-#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#    if SANITIZER_LINUX
   return internal_syscall(SYSCALL(dup3), oldfd, newfd, 0);
 #else
   return internal_syscall(SYSCALL(dup2), oldfd, newfd);
@@ -427,7 +423,7 @@ uptr internal_dup2(int oldfd, int newfd) {
 }
 
 uptr internal_readlink(const char *path, char *buf, uptr bufsize) {
-#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#    if SANITIZER_LINUX
   return internal_syscall(SYSCALL(readlinkat), AT_FDCWD, (uptr)path, (uptr)buf,
                           bufsize);
 #else
@@ -436,7 +432,7 @@ uptr internal_readlink(const char *path, char *buf, uptr bufsize) {
 }
 
 uptr internal_unlink(const char *path) {
-#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#    if SANITIZER_LINUX
   return internal_syscall(SYSCALL(unlinkat), AT_FDCWD, (uptr)path, 0);
 #else
   return internal_syscall(SYSCALL(unlink), (uptr)path);
@@ -447,12 +443,12 @@ uptr internal_rename(const char *oldpath, const char *newpath) {
 #if defined(__riscv) && defined(__linux__)
   return internal_syscall(SYSCALL(renameat2), AT_FDCWD, (uptr)oldpath, AT_FDCWD,
                           (uptr)newpath, 0);
-#elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#    elif SANITIZER_LINUX
   return internal_syscall(SYSCALL(renameat), AT_FDCWD, (uptr)oldpath, AT_FDCWD,
                           (uptr)newpath);
-#else
+#    else
   return internal_syscall(SYSCALL(rename), (uptr)oldpath, (uptr)newpath);
-#endif
+#    endif
 }
 
 uptr internal_sched_yield() {
@@ -489,11 +485,7 @@ bool FileExists(const char *filename) {
   if (ShouldMockFailureToOpen(filename))
     return false;
   struct stat st;
-#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
-  if (internal_syscall(SYSCALL(newfstatat), AT_FDCWD, filename, &st, 0))
-#else
   if (internal_stat(filename, &st))
-#endif
     return false;
   // Sanity check: filename is a regular file.
   return S_ISREG(st.st_mode);
@@ -501,11 +493,7 @@ bool FileExists(const char *filename) {
 
 bool DirExists(const char *path) {
   struct stat st;
-#  if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
-  if (internal_syscall(SYSCALL(newfstatat), AT_FDCWD, path, &st, 0))
-#  else
   if (internal_stat(path, &st))
-#  endif
     return false;
   return S_ISDIR(st.st_mode);
 }
@@ -709,17 +697,17 @@ void FutexWake(atomic_uint32_t *p, u32 count) {
 // Not used
 #else
 struct linux_dirent {
-#if SANITIZER_X32 || defined(__aarch64__) || SANITIZER_RISCV64
+#    if SANITIZER_X32 || SANITIZER_LINUX
   u64 d_ino;
   u64 d_off;
-#else
+#    else
   unsigned long      d_ino;
   unsigned long      d_off;
-#endif
+#    endif
   unsigned short     d_reclen;
-#if defined(__aarch64__) || SANITIZER_RISCV64
+#    if SANITIZER_LINUX
   unsigned char      d_type;
-#endif
+#    endif
   char               d_name[256];
 };
 #endif
@@ -755,11 +743,11 @@ int internal_dlinfo(void *handle, int request, void *p) {
 uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count) {
 #if SANITIZER_FREEBSD
   return internal_syscall(SYSCALL(getdirentries), fd, (uptr)dirp, count, NULL);
-#elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#    elif SANITIZER_LINUX
   return internal_syscall(SYSCALL(getdents64), fd, (uptr)dirp, count);
-#else
+#    else
   return internal_syscall(SYSCALL(getdents), fd, (uptr)dirp, count);
-#endif
+#    endif
 }
 
 uptr internal_lseek(fd_t fd, OFF_T offset, int whence) {
@@ -777,11 +765,15 @@ uptr internal_sigaltstack(const void *ss, void *oss) {
 }
 
 int internal_fork() {
-#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#    if SANITIZER_LINUX
+#      if SANITIZER_S390
+  return internal_syscall(SYSCALL(clone), 0, SIGCHLD);
+#      else
   return internal_syscall(SYSCALL(clone), SIGCHLD, 0);
-#else
+#      endif
+#    else
   return internal_syscall(SYSCALL(fork));
-#endif
+#    endif
 }
 
 #if SANITIZER_FREEBSD
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
index 4f2c551..8fe0d83 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
@@ -298,18 +298,6 @@
 #  define SANITIZER_SIGN_EXTENDED_ADDRESSES 0
 #endif
 
-// The AArch64 and RISC-V linux ports use the canonical syscall set as
-// mandated by the upstream linux community for all new ports. Other ports
-// may still use legacy syscalls.
-#ifndef SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
-#  if (defined(__aarch64__) || defined(__riscv) || defined(__hexagon__)) && \
-      SANITIZER_LINUX
-#    define SANITIZER_USES_CANONICAL_LINUX_SYSCALLS 1
-#  else
-#    define SANITIZER_USES_CANONICAL_LINUX_SYSCALLS 0
-#  endif
-#endif
-
 // udi16 syscalls can only be used when the following conditions are
 // met:
 // * target is one of arm32, x86-32, sparc32, sh or m68k
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index 8e40ea7..60ca963 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -1767,7 +1767,8 @@ TSAN_INTERCEPTOR(int, listen, int fd, int backlog) {
 
 TSAN_INTERCEPTOR(int, close, int fd) {
   SCOPED_INTERCEPTOR_RAW(close, fd);
-  FdClose(thr, pc, fd);
+  if (!in_symbolizer())
+    FdClose(thr, pc, fd);
   return REAL(close)(fd);
 }
 
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 93cb936..c374012 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -254,8 +254,10 @@ genOMP(Fortran::lower::AbstractConverter &converter,
     if (const auto &ifClause =
             std::get_if<Fortran::parser::OmpClause::If>(&clause.u)) {
       auto &expr = std::get<Fortran::parser::ScalarLogicalExpr>(ifClause->v.t);
-      ifClauseOperand = fir::getBase(
+      mlir::Value ifVal = fir::getBase(
           converter.genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx));
+      ifClauseOperand = firOpBuilder.createConvert(
+          currentLocation, firOpBuilder.getI1Type(), ifVal);
     } else if (const auto &numThreadsClause =
                    std::get_if<Fortran::parser::OmpClause::NumThreads>(
                        &clause.u)) {
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index cf56efb..83acaba 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -1806,6 +1806,7 @@ CHECK_SIMPLE_CLAUSE(MemoryOrder, OMPC_memory_order)
 CHECK_SIMPLE_CLAUSE(Bind, OMPC_bind)
 CHECK_SIMPLE_CLAUSE(Align, OMPC_align)
 CHECK_SIMPLE_CLAUSE(Compare, OMPC_compare)
+CHECK_SIMPLE_CLAUSE(CancellationConstructType, OMPC_cancellation_construct_type)
 
 CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize)
 CHECK_REQ_SCALAR_INT_CLAUSE(NumTasks, OMPC_num_tasks)
diff --git a/flang/lib/Semantics/check-return.cpp b/flang/lib/Semantics/check-return.cpp
index b28c82b..ec2600b 100644
--- a/flang/lib/Semantics/check-return.cpp
+++ b/flang/lib/Semantics/check-return.cpp
@@ -36,7 +36,8 @@ void ReturnStmtChecker::Leave(const parser::ReturnStmt &returnStmt) {
             IsFunction(*subprogramScope->GetSymbol()))) {
       context_.Say(
           "RETURN with expression is only allowed in SUBROUTINE subprogram"_err_en_US);
-    } else if (context_.ShouldWarn(common::LanguageFeature::ProgramReturn)) {
+    } else if (subprogramScope->kind() == Scope::Kind::MainProgram &&
+        context_.ShouldWarn(common::LanguageFeature::ProgramReturn)) {
       context_.Say("RETURN should not appear in a main program"_port_en_US);
     }
   }
diff --git a/flang/test/Lower/OpenMP/parallel.f90 b/flang/test/Lower/OpenMP/parallel.f90
index 849db5c3..70dcee4 100644
--- a/flang/test/Lower/OpenMP/parallel.f90
+++ b/flang/test/Lower/OpenMP/parallel.f90
@@ -15,8 +15,13 @@ end subroutine parallel_simple
 !===============================================================================
 
 !FIRDialect-LABEL: func @_QPparallel_if
-subroutine parallel_if(alpha)
+subroutine parallel_if(alpha, beta, gamma)
    integer, intent(in) :: alpha
+   logical, intent(in) :: beta
+   logical(1) :: logical1
+   logical(2) :: logical2
+   logical(4) :: logical4
+   logical(8) :: logical8
 
    !OMPDialect: omp.parallel if(%{{.*}} : i1) {
    !$omp parallel if(alpha .le. 0)
@@ -46,6 +51,41 @@ subroutine parallel_if(alpha)
    !OMPDialect: omp.terminator
    !$omp end parallel
 
+   !OMPDialect: omp.parallel if(%{{.*}} : i1) {
+   !$omp parallel if(beta)
+   !FIRDialect: fir.call
+   call f1()
+   !OMPDialect: omp.terminator
+   !$omp end parallel
+
+   !OMPDialect: omp.parallel if(%{{.*}} : i1) {
+   !$omp parallel if(logical1)
+   !FIRDialect: fir.call
+   call f1()
+   !OMPDialect: omp.terminator
+   !$omp end parallel
+
+   !OMPDialect: omp.parallel if(%{{.*}} : i1) {
+   !$omp parallel if(logical2)
+   !FIRDialect: fir.call
+   call f1()
+   !OMPDialect: omp.terminator
+   !$omp end parallel
+
+   !OMPDialect: omp.parallel if(%{{.*}} : i1) {
+   !$omp parallel if(logical4)
+   !FIRDialect: fir.call
+   call f1()
+   !OMPDialect: omp.terminator
+   !$omp end parallel
+
+   !OMPDialect: omp.parallel if(%{{.*}} : i1) {
+   !$omp parallel if(logical8)
+   !FIRDialect: fir.call
+   call f1()
+   !OMPDialect: omp.terminator
+   !$omp end parallel
+
 end subroutine parallel_if
 
 !===============================================================================
diff --git a/flang/test/Lower/OpenMP/taskwait.f90 b/flang/test/Lower/OpenMP/taskwait.f90
new file mode 100644
index 0000000..30e6422
--- /dev/null
+++ b/flang/test/Lower/OpenMP/taskwait.f90
@@ -0,0 +1,12 @@
+!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s --check-prefixes="FIRDialect,OMPDialect"
+!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | fir-opt --fir-to-llvm-ir | FileCheck %s --check-prefixes="OMPDialect"
+
+!FIRDialect-LABEL: @_QPomp_taskwait
+subroutine omp_taskwait
+  !OMPDialect: omp.taskwait
+  !$omp taskwait
+  !FIRDialect: fir.call @_QPfoo() : () -> ()
+  call foo()
+  !OMPDialect: omp.taskwait
+  !$omp taskwait
+end subroutine omp_taskwait
diff --git a/flang/test/Lower/OpenMP/taskyield.f90 b/flang/test/Lower/OpenMP/taskyield.f90
new file mode 100644
index 0000000..07d0c12
--- /dev/null
+++ b/flang/test/Lower/OpenMP/taskyield.f90
@@ -0,0 +1,12 @@
+!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s --check-prefixes="FIRDialect,OMPDialect"
+!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | fir-opt --fir-to-llvm-ir | FileCheck %s --check-prefixes="OMPDialect"
+
+!FIRDialect-LABEL: @_QPomp_taskyield
+subroutine omp_taskyield
+  !OMPDialect: omp.taskyield
+  !$omp taskyield
+  !FIRDialect: fir.call @_QPfoo() : () -> ()
+  call foo()
+  !OMPDialect: omp.taskyield
+  !$omp taskyield
+end subroutine omp_taskyield
diff --git a/flang/test/Semantics/altreturn02.f90 b/flang/test/Semantics/altreturn02.f90
index a142871..f17d173 100644
--- a/flang/test/Semantics/altreturn02.f90
+++ b/flang/test/Semantics/altreturn02.f90
@@ -1,4 +1,4 @@
-! RUN: %python %S/test_errors.py %s %flang_fc1
+! RUN: %flang_fc1 -fsyntax-only -pedantic %s  2>&1 | FileCheck %s --allow-empty
 ! Check subroutine with alt return
 
        SUBROUTINE TEST (N, *, *)
@@ -6,3 +6,5 @@
        IF ( N .EQ. 1 ) RETURN 1
        RETURN 2
        END
+! CHECK-NOT: error:
+! CHECK-NOT: portability:
diff --git a/flang/test/Semantics/altreturn07.f90 b/flang/test/Semantics/altreturn07.f90
new file mode 100644
index 0000000..b0cb60a
--- /dev/null
+++ b/flang/test/Semantics/altreturn07.f90
@@ -0,0 +1,6 @@
+! RUN: %flang_fc1 -fsyntax-only -pedantic %s  2>&1 | FileCheck %s
+! Test extension: RETURN from main program
+
+return
+! CHECK: portability: RETURN should not appear in a main program
+end
diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h
index a504b06..7893cb0b 100644
--- a/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -4447,7 +4447,11 @@ Node *AbstractManglingParser<Derived, Alloc>::parseFoldExpr() {
   ++First;
 
   const auto *Op = parseOperatorEncoding();
-  if (!Op || Op->getKind() != OperatorInfo::Binary)
+  if (!Op)
+    return nullptr;
+  if (!(Op->getKind() == OperatorInfo::Binary
+        || (Op->getKind() == OperatorInfo::Member
+            && Op->getName().back() == '*')))
     return nullptr;
 
   Node *Pack = getDerived().parseExpr();
diff --git a/libcxxabi/test/test_demangle.pass.cpp b/libcxxabi/test/test_demangle.pass.cpp
index 172f15f..b0295ce0 100644
--- a/libcxxabi/test/test_demangle.pass.cpp
+++ b/libcxxabi/test/test_demangle.pass.cpp
@@ -29963,6 +29963,8 @@ const char* cases[][2] =
      "void Partial<1, 2>::foldr<3, 4>(A<1 + (2 + ((3, 4) + ... + (1 + (2 + ((3, 4) + ...)))))>)"},
     {"_ZN7PartialIJLi1ELi2EEE5foldrIJLi3ELi4EEEEv1AIXplplLi1ELi2EfRplT_plplLi1ELi2EflplT_EE",
      "void Partial<1, 2>::foldr<3, 4>(A<1 + 2 + ((3, 4) + ... + (1 + 2 + (... + (3, 4))))>)"},
+    {"_Z1fIXfLpm1x1yEEvv", "void f<(x ->* ... ->* (y...))>()"},
+    {"_Z1fIXfLds1x1yEEvv", "void f<(x .* ... .* (y...))>()"},
 
     // reference collapsing:
     {"_Z1fIR1SEiOT_", "int f<S&>(S&)"},
@@ -30173,6 +30175,9 @@ const char* invalid_cases[] =
     "_ZN1fIiEEvNTUt_E",
     "_ZNDTUt_Ev",
 
+    "_Z1fIXfLpt1x1yEEvv",
+    "_Z1fIXfLdt1x1yEEvv",
+
     "_ZN1fIXawLi0EEEEvv",
 
     "_ZNWUt_3FOOEv",
diff --git a/libunwind/include/__libunwind_config.h b/libunwind/include/__libunwind_config.h
index 30f5e0a..e626567 100644
--- a/libunwind/include/__libunwind_config.h
+++ b/libunwind/include/__libunwind_config.h
@@ -29,6 +29,7 @@
 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_HEXAGON   34
 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV     64
 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_VE        143
+#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_S390X     83
 
 #if defined(_LIBUNWIND_IS_NATIVE_ONLY)
 # if defined(__linux__)
@@ -160,6 +161,11 @@
 #  define _LIBUNWIND_CONTEXT_SIZE 67
 #  define _LIBUNWIND_CURSOR_SIZE 79
 #  define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_VE
+# elif defined(__s390x__)
+#  define _LIBUNWIND_TARGET_S390X 1
+#  define _LIBUNWIND_CONTEXT_SIZE 34
+#  define _LIBUNWIND_CURSOR_SIZE 46
+#  define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_S390X
 # else
 #  error "Unsupported architecture."
 # endif
@@ -178,6 +184,7 @@
 # define _LIBUNWIND_TARGET_HEXAGON 1
 # define _LIBUNWIND_TARGET_RISCV 1
 # define _LIBUNWIND_TARGET_VE 1
+# define _LIBUNWIND_TARGET_S390X 1
 # define _LIBUNWIND_CONTEXT_SIZE 167
 # define _LIBUNWIND_CURSOR_SIZE 179
 # define _LIBUNWIND_HIGHEST_DWARF_REGISTER 287
diff --git a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h
index a69e72f..3d8fd21 100644
--- a/libunwind/include/libunwind.h
+++ b/libunwind/include/libunwind.h
@@ -1177,4 +1177,46 @@ enum {
   UNW_VE_VL   = 145,
 };
 
+// s390x register numbers
+enum {
+  UNW_S390X_R0      = 0,
+  UNW_S390X_R1      = 1,
+  UNW_S390X_R2      = 2,
+  UNW_S390X_R3      = 3,
+  UNW_S390X_R4      = 4,
+  UNW_S390X_R5      = 5,
+  UNW_S390X_R6      = 6,
+  UNW_S390X_R7      = 7,
+  UNW_S390X_R8      = 8,
+  UNW_S390X_R9      = 9,
+  UNW_S390X_R10     = 10,
+  UNW_S390X_R11     = 11,
+  UNW_S390X_R12     = 12,
+  UNW_S390X_R13     = 13,
+  UNW_S390X_R14     = 14,
+  UNW_S390X_R15     = 15,
+  UNW_S390X_F0      = 16,
+  UNW_S390X_F2      = 17,
+  UNW_S390X_F4      = 18,
+  UNW_S390X_F6      = 19,
+  UNW_S390X_F1      = 20,
+  UNW_S390X_F3      = 21,
+  UNW_S390X_F5      = 22,
+  UNW_S390X_F7      = 23,
+  UNW_S390X_F8      = 24,
+  UNW_S390X_F10     = 25,
+  UNW_S390X_F12     = 26,
+  UNW_S390X_F14     = 27,
+  UNW_S390X_F9      = 28,
+  UNW_S390X_F11     = 29,
+  UNW_S390X_F13     = 30,
+  UNW_S390X_F15     = 31,
+  // 32-47 Control Registers
+  // 48-63 Access Registers
+  UNW_S390X_PSWM    = 64,
+  UNW_S390X_PSWA    = 65,
+  // 66-67 Reserved
+  // 68-83 Vector Registers %v16-%v31
+};
+
 #endif
diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp
index 32c13e6..28c617f3 100644
--- a/libunwind/src/Registers.hpp
+++ b/libunwind/src/Registers.hpp
@@ -39,6 +39,7 @@ enum {
   REGISTERS_HEXAGON,
   REGISTERS_RISCV,
   REGISTERS_VE,
+  REGISTERS_S390X,
 };
 
 #if defined(_LIBUNWIND_TARGET_I386)
@@ -4716,6 +4717,293 @@ inline const char *Registers_ve::getRegisterName(int regNum) {
 }
 #endif // _LIBUNWIND_TARGET_VE
 
+#if defined(_LIBUNWIND_TARGET_S390X)
+/// Registers_s390x holds the register state of a thread in a
+/// 64-bit Linux on IBM zSystems process.
+class _LIBUNWIND_HIDDEN Registers_s390x {
+public:
+  Registers_s390x();
+  Registers_s390x(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint64_t    getRegister(int num) const;
+  void        setRegister(int num, uint64_t value);
+  bool        validFloatRegister(int num) const;
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
+  static const char *getRegisterName(int num);
+  void        jumpto();
+  static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_S390X; }
+  static int  getArch() { return REGISTERS_S390X; }
+
+  uint64_t  getSP() const         { return _registers.__gpr[15]; }
+  void      setSP(uint64_t value) { _registers.__gpr[15] = value; }
+  uint64_t  getIP() const         { return _registers.__pswa; }
+  void      setIP(uint64_t value) { _registers.__pswa = value; }
+
+private:
+  struct s390x_thread_state_t {
+    uint64_t __pswm;    // Problem Status Word: Mask
+    uint64_t __pswa;    // Problem Status Word: Address (PC)
+    uint64_t __gpr[16]; // General Purpose Registers
+    double __fpr[16];   // Floating-Point Registers
+  };
+
+  s390x_thread_state_t _registers;
+};
+
+inline Registers_s390x::Registers_s390x(const void *registers) {
+  static_assert((check_fit<Registers_s390x, unw_context_t>::does_fit),
+                "s390x registers do not fit into unw_context_t");
+  memcpy(&_registers, static_cast<const uint8_t *>(registers),
+         sizeof(_registers));
+}
+
+inline Registers_s390x::Registers_s390x() {
+  memset(&_registers, 0, sizeof(_registers));
+}
+
+inline bool Registers_s390x::validRegister(int regNum) const {
+  switch (regNum) {
+  case UNW_S390X_PSWM:
+  case UNW_S390X_PSWA:
+  case UNW_REG_IP:
+  case UNW_REG_SP:
+      return true;
+  }
+
+  if (regNum >= UNW_S390X_R0 && regNum <= UNW_S390X_R15)
+    return true;
+
+  return false;
+}
+
+inline uint64_t Registers_s390x::getRegister(int regNum) const {
+  if (regNum >= UNW_S390X_R0 && regNum <= UNW_S390X_R15)
+    return _registers.__gpr[regNum - UNW_S390X_R0];
+
+  switch (regNum) {
+  case UNW_S390X_PSWM:
+    return _registers.__pswm;
+  case UNW_S390X_PSWA:
+  case UNW_REG_IP:
+    return _registers.__pswa;
+  case UNW_REG_SP:
+    return _registers.__gpr[15];
+  }
+  _LIBUNWIND_ABORT("unsupported s390x register");
+}
+
+inline void Registers_s390x::setRegister(int regNum, uint64_t value) {
+  if (regNum >= UNW_S390X_R0 && regNum <= UNW_S390X_R15) {
+    _registers.__gpr[regNum - UNW_S390X_R0] = value;
+    return;
+  }
+
+  switch (regNum) {
+  case UNW_S390X_PSWM:
+    _registers.__pswm = value;
+    return;
+  case UNW_S390X_PSWA:
+  case UNW_REG_IP:
+    _registers.__pswa = value;
+    return;
+  case UNW_REG_SP:
+    _registers.__gpr[15] = value;
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported s390x register");
+}
+
+inline bool Registers_s390x::validFloatRegister(int regNum) const {
+  return regNum >= UNW_S390X_F0 && regNum <= UNW_S390X_F15;
+}
+
+inline double Registers_s390x::getFloatRegister(int regNum) const {
+  // NOTE: FPR DWARF register numbers are not consecutive.
+  switch (regNum) {
+  case UNW_S390X_F0:
+    return _registers.__fpr[0];
+  case UNW_S390X_F1:
+    return _registers.__fpr[1];
+  case UNW_S390X_F2:
+    return _registers.__fpr[2];
+  case UNW_S390X_F3:
+    return _registers.__fpr[3];
+  case UNW_S390X_F4:
+    return _registers.__fpr[4];
+  case UNW_S390X_F5:
+    return _registers.__fpr[5];
+  case UNW_S390X_F6:
+    return _registers.__fpr[6];
+  case UNW_S390X_F7:
+    return _registers.__fpr[7];
+  case UNW_S390X_F8:
+    return _registers.__fpr[8];
+  case UNW_S390X_F9:
+    return _registers.__fpr[9];
+  case UNW_S390X_F10:
+    return _registers.__fpr[10];
+  case UNW_S390X_F11:
+    return _registers.__fpr[11];
+  case UNW_S390X_F12:
+    return _registers.__fpr[12];
+  case UNW_S390X_F13:
+    return _registers.__fpr[13];
+  case UNW_S390X_F14:
+    return _registers.__fpr[14];
+  case UNW_S390X_F15:
+    return _registers.__fpr[15];
+  }
+  _LIBUNWIND_ABORT("unsupported s390x register");
+}
+
+inline void Registers_s390x::setFloatRegister(int regNum, double value) {
+  // NOTE: FPR DWARF register numbers are not consecutive.
+  switch (regNum) {
+  case UNW_S390X_F0:
+    _registers.__fpr[0] = value;
+    return;
+  case UNW_S390X_F1:
+    _registers.__fpr[1] = value;
+    return;
+  case UNW_S390X_F2:
+    _registers.__fpr[2] = value;
+    return;
+  case UNW_S390X_F3:
+    _registers.__fpr[3] = value;
+    return;
+  case UNW_S390X_F4:
+    _registers.__fpr[4] = value;
+    return;
+  case UNW_S390X_F5:
+    _registers.__fpr[5] = value;
+    return;
+  case UNW_S390X_F6:
+    _registers.__fpr[6] = value;
+    return;
+  case UNW_S390X_F7:
+    _registers.__fpr[7] = value;
+    return;
+  case UNW_S390X_F8:
+    _registers.__fpr[8] = value;
+    return;
+  case UNW_S390X_F9:
+    _registers.__fpr[9] = value;
+    return;
+  case UNW_S390X_F10:
+    _registers.__fpr[10] = value;
+    return;
+  case UNW_S390X_F11:
+    _registers.__fpr[11] = value;
+    return;
+  case UNW_S390X_F12:
+    _registers.__fpr[12] = value;
+    return;
+  case UNW_S390X_F13:
+    _registers.__fpr[13] = value;
+    return;
+  case UNW_S390X_F14:
+    _registers.__fpr[14] = value;
+    return;
+  case UNW_S390X_F15:
+    _registers.__fpr[15] = value;
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported s390x register");
+}
+
+inline bool Registers_s390x::validVectorRegister(int /*regNum*/) const {
+  return false;
+}
+
+inline v128 Registers_s390x::getVectorRegister(int /*regNum*/) const {
+  _LIBUNWIND_ABORT("s390x vector support not implemented");
+}
+
+inline void Registers_s390x::setVectorRegister(int /*regNum*/, v128 /*value*/) {
+  _LIBUNWIND_ABORT("s390x vector support not implemented");
+}
+
+inline const char *Registers_s390x::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return "ip";
+  case UNW_REG_SP:
+    return "sp";
+  case UNW_S390X_R0:
+    return "r0";
+  case UNW_S390X_R1:
+    return "r1";
+  case UNW_S390X_R2:
+    return "r2";
+  case UNW_S390X_R3:
+    return "r3";
+  case UNW_S390X_R4:
+    return "r4";
+  case UNW_S390X_R5:
+    return "r5";
+  case UNW_S390X_R6:
+    return "r6";
+  case UNW_S390X_R7:
+    return "r7";
+  case UNW_S390X_R8:
+    return "r8";
+  case UNW_S390X_R9:
+    return "r9";
+  case UNW_S390X_R10:
+    return "r10";
+  case UNW_S390X_R11:
+    return "r11";
+  case UNW_S390X_R12:
+    return "r12";
+  case UNW_S390X_R13:
+    return "r13";
+  case UNW_S390X_R14:
+    return "r14";
+  case UNW_S390X_R15:
+    return "r15";
+  case UNW_S390X_F0:
+    return "f0";
+  case UNW_S390X_F1:
+    return "f1";
+  case UNW_S390X_F2:
+    return "f2";
+  case UNW_S390X_F3:
+    return "f3";
+  case UNW_S390X_F4:
+    return "f4";
+  case UNW_S390X_F5:
+    return "f5";
+  case UNW_S390X_F6:
+    return "f6";
+  case UNW_S390X_F7:
+    return "f7";
+  case UNW_S390X_F8:
+    return "f8";
+  case UNW_S390X_F9:
+    return "f9";
+  case UNW_S390X_F10:
+    return "f10";
+  case UNW_S390X_F11:
+    return "f11";
+  case UNW_S390X_F12:
+    return "f12";
+  case UNW_S390X_F13:
+    return "f13";
+  case UNW_S390X_F14:
+    return "f14";
+  case UNW_S390X_F15:
+    return "f15";
+  }
+  return "unknown register";
+}
+#endif // _LIBUNWIND_TARGET_S390X
+
+
 } // namespace libunwind
 
 #endif // __REGISTERS_HPP__
diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 29ded5c..655e41d 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -1220,6 +1220,12 @@ private:
   }
 #endif
 
+#if defined (_LIBUNWIND_TARGET_S390X)
+  compact_unwind_encoding_t dwarfEncoding(Registers_s390x &) const {
+    return 0;
+  }
+#endif
+
 #endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
 
 #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S
index b4843c9..eeb6453 100644
--- a/libunwind/src/UnwindRegistersRestore.S
+++ b/libunwind/src/UnwindRegistersRestore.S
@@ -1252,6 +1252,43 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_riscv6jumptoEv)
 
   ret                       // jump to ra
 
+#elif defined(__s390x__)
+
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_s390x6jumptoEv)
+//
+// void libunwind::Registers_s390x::jumpto()
+//
+// On entry:
+//  thread_state pointer is in r2
+//
+
+  // Skip PSWM, but load PSWA into r1
+  lg %r1, 8(%r2)
+
+  // Restore FPRs
+  ld %f0, 144(%r2)
+  ld %f1, 152(%r2)
+  ld %f2, 160(%r2)
+  ld %f3, 168(%r2)
+  ld %f4, 176(%r2)
+  ld %f5, 184(%r2)
+  ld %f6, 192(%r2)
+  ld %f7, 200(%r2)
+  ld %f8, 208(%r2)
+  ld %f9, 216(%r2)
+  ld %f10, 224(%r2)
+  ld %f11, 232(%r2)
+  ld %f12, 240(%r2)
+  ld %f13, 248(%r2)
+  ld %f14, 256(%r2)
+  ld %f15, 264(%r2)
+
+  // Restore GPRs - skipping %r0 and %r1
+  lmg  %r2, %r15, 32(%r2)
+
+  // Return to PSWA (was loaded into %r1 above)
+  br %r1
+
 #endif
 
 #endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */
diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S
index 0cfc7b5..f57dd63 100644
--- a/libunwind/src/UnwindRegistersSave.S
+++ b/libunwind/src/UnwindRegistersSave.S
@@ -1179,6 +1179,49 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
 
   li     a0, 0  // return UNW_ESUCCESS
   ret           // jump to ra
+
+#elif defined(__s390x__)
+
+//
+// extern int __unw_getcontext(unw_context_t* thread_state)
+//
+// On entry:
+//  thread_state pointer is in r2
+//
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+
+  // Save GPRs
+  stmg %r0, %r15, 16(%r2)
+
+  // Save PSWM
+  epsw %r0, %r1
+  stm %r0, %r1, 0(%r2)
+
+  // Store return address as PSWA
+  stg %r14, 8(%r2)
+
+  // Save FPRs
+  std %f0, 144(%r2)
+  std %f1, 152(%r2)
+  std %f2, 160(%r2)
+  std %f3, 168(%r2)
+  std %f4, 176(%r2)
+  std %f5, 184(%r2)
+  std %f6, 192(%r2)
+  std %f7, 200(%r2)
+  std %f8, 208(%r2)
+  std %f9, 216(%r2)
+  std %f10, 224(%r2)
+  std %f11, 232(%r2)
+  std %f12, 240(%r2)
+  std %f13, 248(%r2)
+  std %f14, 256(%r2)
+  std %f15, 264(%r2)
+
+  // Return UNW_ESUCCESS
+  lghi %r2, 0
+  br %r14
+
 #endif
 
   WEAK_ALIAS(__unw_getcontext, unw_getcontext)
diff --git a/libunwind/src/config.h b/libunwind/src/config.h
index e751860..cc41b81 100644
--- a/libunwind/src/config.h
+++ b/libunwind/src/config.h
@@ -115,7 +115,7 @@
 #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) ||        \
     (!defined(__APPLE__) && defined(__arm__)) || defined(__aarch64__) ||       \
     defined(__mips__) || defined(__riscv) || defined(__hexagon__) ||           \
-    defined(__sparc__)
+    defined(__sparc__) || defined(__s390x__)
 #if !defined(_LIBUNWIND_BUILD_SJLJ_APIS)
 #define _LIBUNWIND_BUILD_ZERO_COST_APIS
 #endif
diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
index 21aa426..b8b41ff 100644
--- a/libunwind/src/libunwind.cpp
+++ b/libunwind/src/libunwind.cpp
@@ -75,6 +75,8 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor,
 # define REGISTER_KIND Registers_riscv
 #elif defined(__ve__)
 # define REGISTER_KIND Registers_ve
+#elif defined(__s390x__)
+# define REGISTER_KIND Registers_s390x
 #else
 # error Architecture not supported
 #endif
diff --git a/lldb/docs/lldb-gdb-remote.txt b/lldb/docs/lldb-gdb-remote.txt
index 2ddc318..545fbb4 100644
--- a/lldb/docs/lldb-gdb-remote.txt
+++ b/lldb/docs/lldb-gdb-remote.txt
@@ -467,7 +467,7 @@ read packet: OK/E<error code>;AAAAAAAAA
 //
 //  Binary data kinds:
 //    - threadTraceBuffer: trace buffer for a thread.
-//    - cpuInfo: contents of the /proc/cpuinfo file.
+//    - procfsCpuInfo: contents of the /proc/cpuinfo file.
 //
 //  Counter info kinds:
 //    tsc-perf-zero-conversion:
@@ -522,7 +522,7 @@ read packet: {...object}/E<error code>;AAAAAAAAA
 //
 //  Binary data kinds:
 //    - threadTraceBuffer: trace buffer for a thread.
-//    - cpuInfo: contents of the /proc/cpuinfo file.
+//    - procfsCpuInfo: contents of the /proc/cpuinfo file.
 //----------------------------------------------------------------------
 
 send packet: jLLDBTraceGetBinaryData:{"type":<type>,"kind":<query>,"tid":<tid>,"offset":<offset>,"size":<size>}]
diff --git a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h
index 8960949..294b020 100644
--- a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h
+++ b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h
@@ -18,6 +18,12 @@
 /// See docs/lldb-gdb-remote.txt for more information.
 namespace lldb_private {
 
+// List of data kinds used by jLLDBGetState and jLLDBGetBinaryData.
+struct IntelPTDataKinds {
+  static const char *kProcFsCpuInfo;
+  static const char *kThreadTraceBuffer;
+};
+
 /// jLLDBTraceStart gdb-remote packet
 /// \{
 struct TraceIntelPTStartRequest : TraceStartRequest {
diff --git a/lldb/source/Interpreter/Options.cpp b/lldb/source/Interpreter/Options.cpp
index feebe33..7d1f766 100644
--- a/lldb/source/Interpreter/Options.cpp
+++ b/lldb/source/Interpreter/Options.cpp
@@ -20,6 +20,7 @@
 #include "lldb/Interpreter/CommandReturnObject.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/StreamString.h"
+#include "llvm/ADT/STLExtras.h"
 
 using namespace lldb;
 using namespace lldb_private;
@@ -420,8 +421,6 @@ void Options::GenerateOptionUsage(Stream &strm, CommandObject *cmd,
 
   uint32_t num_option_sets = GetRequiredOptions().size();
 
-  uint32_t i;
-
   if (!only_print_args) {
     for (uint32_t opt_set = 0; opt_set < num_option_sets; ++opt_set) {
       uint32_t opt_set_mask;
@@ -440,77 +439,46 @@ void Options::GenerateOptionUsage(Stream &strm, CommandObject *cmd,
       // single string. If a command has "-a" "-b" and "-c", this will show up
       // as [-abc]
 
-      std::set<int> options;
-      std::set<int>::const_iterator options_pos, options_end;
-      for (auto &def : opt_defs) {
-        if (def.usage_mask & opt_set_mask && def.HasShortOption()) {
-          // Add current option to the end of out_stream.
+      // We use a set here so that they will be sorted.
+      std::set<int> required_options;
+      std::set<int> optional_options;
 
-          if (def.required && def.option_has_arg == OptionParser::eNoArgument) {
-            options.insert(def.short_option);
+      for (auto &def : opt_defs) {
+        if (def.usage_mask & opt_set_mask && def.HasShortOption() &&
+            def.option_has_arg == OptionParser::eNoArgument) {
+          if (def.required) {
+            required_options.insert(def.short_option);
+          } else {
+            optional_options.insert(def.short_option);
           }
         }
       }
 
-      if (!options.empty()) {
-        // We have some required options with no arguments
+      if (!required_options.empty()) {
         strm.PutCString(" -");
-        for (i = 0; i < 2; ++i)
-          for (options_pos = options.begin(), options_end = options.end();
-               options_pos != options_end; ++options_pos) {
-            if (i == 0 && ::islower(*options_pos))
-              continue;
-            if (i == 1 && ::isupper(*options_pos))
-              continue;
-            strm << (char)*options_pos;
-          }
+        for (int short_option : required_options)
+          strm.PutChar(short_option);
       }
 
-      options.clear();
-      for (auto &def : opt_defs) {
-        if (def.usage_mask & opt_set_mask && def.HasShortOption()) {
-          // Add current option to the end of out_stream.
-
-          if (!def.required &&
-              def.option_has_arg == OptionParser::eNoArgument) {
-            options.insert(def.short_option);
-          }
-        }
-      }
-
-      if (!options.empty()) {
-        // We have some required options with no arguments
+      if (!optional_options.empty()) {
         strm.PutCString(" [-");
-        for (i = 0; i < 2; ++i)
-          for (options_pos = options.begin(), options_end = options.end();
-               options_pos != options_end; ++options_pos) {
-            if (i == 0 && ::islower(*options_pos))
-              continue;
-            if (i == 1 && ::isupper(*options_pos))
-              continue;
-            strm << (char)*options_pos;
-          }
+        for (int short_option : optional_options)
+          strm.PutChar(short_option);
         strm.PutChar(']');
       }
 
       // First go through and print the required options (list them up front).
-
       for (auto &def : opt_defs) {
-        if (def.usage_mask & opt_set_mask && def.HasShortOption()) {
-          if (def.required && def.option_has_arg != OptionParser::eNoArgument)
-            PrintOption(def, eDisplayBestOption, " ", nullptr, true, strm);
-        }
+        if (def.usage_mask & opt_set_mask && def.HasShortOption() &&
+            def.required && def.option_has_arg != OptionParser::eNoArgument)
+          PrintOption(def, eDisplayBestOption, " ", nullptr, true, strm);
       }
 
       // Now go through again, and this time only print the optional options.
-
       for (auto &def : opt_defs) {
-        if (def.usage_mask & opt_set_mask) {
-          // Add current option to the end of out_stream.
-
-          if (!def.required && def.option_has_arg != OptionParser::eNoArgument)
-            PrintOption(def, eDisplayBestOption, " ", nullptr, true, strm);
-        }
+        if (def.usage_mask & opt_set_mask && !def.required &&
+            def.option_has_arg != OptionParser::eNoArgument)
+          PrintOption(def, eDisplayBestOption, " ", nullptr, true, strm);
       }
 
       if (args_str.GetSize() > 0) {
@@ -540,63 +508,49 @@ void Options::GenerateOptionUsage(Stream &strm, CommandObject *cmd,
     //   -short <argument> ( --long_name <argument> )
     //   help text
 
-    // This variable is used to keep track of which options' info we've printed
-    // out, because some options can be in more than one usage level, but we
-    // only want to print the long form of its information once.
-
-    std::multimap<int, uint32_t> options_seen;
     strm.IndentMore(5);
 
-    // Put the unique command options in a vector & sort it, so we can output
-    // them alphabetically (by short_option) when writing out detailed help for
-    // each option.
-
-    i = 0;
-    for (auto &def : opt_defs)
-      options_seen.insert(std::make_pair(def.short_option, i++));
+    // Put the command options in a sorted container, so we can output
+    // them alphabetically by short_option.
+    std::multimap<int, uint32_t> options_ordered;
+    for (auto def : llvm::enumerate(opt_defs))
+      options_ordered.insert(
+          std::make_pair(def.value().short_option, def.index()));
 
-    // Go through the unique'd and alphabetically sorted vector of options,
-    // find the table entry for each option and write out the detailed help
-    // information for that option.
+    // Go through each option, find the table entry and write out the detailed
+    // help information for that option.
 
     bool first_option_printed = false;
 
-    for (auto pos : options_seen) {
-      i = pos.second;
-      // Print out the help information for this option.
-
+    for (auto pos : options_ordered) {
       // Put a newline separation between arguments
       if (first_option_printed)
         strm.EOL();
       else
         first_option_printed = true;
 
-      CommandArgumentType arg_type = opt_defs[i].argument_type;
-
-      StreamString arg_name_str;
-      arg_name_str.Printf("<%s>", CommandObject::GetArgumentName(arg_type));
+      OptionDefinition opt_def = opt_defs[pos.second];
 
       strm.Indent();
-      if (opt_defs[i].short_option && opt_defs[i].HasShortOption()) {
-        PrintOption(opt_defs[i], eDisplayShortOption, nullptr, nullptr, false,
+      if (opt_def.short_option && opt_def.HasShortOption()) {
+        PrintOption(opt_def, eDisplayShortOption, nullptr, nullptr, false,
                     strm);
-        PrintOption(opt_defs[i], eDisplayLongOption, " ( ", " )", false, strm);
+        PrintOption(opt_def, eDisplayLongOption, " ( ", " )", false, strm);
       } else {
         // Short option is not printable, just print long option
-        PrintOption(opt_defs[i], eDisplayLongOption, nullptr, nullptr, false,
-                    strm);
+        PrintOption(opt_def, eDisplayLongOption, nullptr, nullptr, false, strm);
       }
       strm.EOL();
 
       strm.IndentMore(5);
 
-      if (opt_defs[i].usage_text)
-        OutputFormattedUsageText(strm, opt_defs[i], screen_width);
-      if (!opt_defs[i].enum_values.empty()) {
+      if (opt_def.usage_text)
+        OutputFormattedUsageText(strm, opt_def, screen_width);
+      if (!opt_def.enum_values.empty()) {
         strm.Indent();
         strm.Printf("Values: ");
         bool is_first = true;
-        for (const auto &enum_value : opt_defs[i].enum_values) {
+        for (const auto &enum_value : opt_def.enum_values) {
           if (is_first) {
             strm.Printf("%s", enum_value.string_value);
             is_first = false;
diff --git a/lldb/source/Plugins/Platform/MacOSX/CMakeLists.txt b/lldb/source/Plugins/Platform/MacOSX/CMakeLists.txt
index bd93437..3b43598 100644
--- a/lldb/source/Plugins/Platform/MacOSX/CMakeLists.txt
+++ b/lldb/source/Plugins/Platform/MacOSX/CMakeLists.txt
@@ -8,6 +8,7 @@ lldb_tablegen(PlatformMacOSXPropertiesEnum.inc -gen-lldb-property-enum-defs
 
 list(APPEND PLUGIN_PLATFORM_MACOSX_SOURCES
   PlatformDarwin.cpp
+  PlatformDarwinDevice.cpp
   PlatformDarwinKernel.cpp
   PlatformMacOSX.cpp
   PlatformRemoteAppleBridge.cpp
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
index d169676..cac9c67 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
@@ -200,167 +200,6 @@ Status PlatformDarwin::ResolveSymbolFile(Target &target,
   return {};
 }
 
-static lldb_private::Status
-MakeCacheFolderForFile(const FileSpec &module_cache_spec) {
-  FileSpec module_cache_folder =
-      module_cache_spec.CopyByRemovingLastPathComponent();
-  return llvm::sys::fs::create_directory(module_cache_folder.GetPath());
-}
-
-static lldb_private::Status
-BringInRemoteFile(Platform *platform,
-                  const lldb_private::ModuleSpec &module_spec,
-                  const FileSpec &module_cache_spec) {
-  MakeCacheFolderForFile(module_cache_spec);
-  Status err = platform->GetFile(module_spec.GetFileSpec(), module_cache_spec);
-  return err;
-}
-
-lldb_private::Status PlatformDarwin::GetSharedModuleWithLocalCache(
-    const lldb_private::ModuleSpec &module_spec, lldb::ModuleSP &module_sp,
-    const lldb_private::FileSpecList *module_search_paths_ptr,
-    llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules, bool *did_create_ptr) {
-
-  Log *log = GetLog(LLDBLog::Platform);
-  LLDB_LOGF(log,
-            "[%s] Trying to find module %s/%s - platform path %s/%s symbol "
-            "path %s/%s",
-            (IsHost() ? "host" : "remote"),
-            module_spec.GetFileSpec().GetDirectory().AsCString(),
-            module_spec.GetFileSpec().GetFilename().AsCString(),
-            module_spec.GetPlatformFileSpec().GetDirectory().AsCString(),
-            module_spec.GetPlatformFileSpec().GetFilename().AsCString(),
-            module_spec.GetSymbolFileSpec().GetDirectory().AsCString(),
-            module_spec.GetSymbolFileSpec().GetFilename().AsCString());
-
-  Status err;
-
-  if (CheckLocalSharedCache()) {
-    // When debugging on the host, we are most likely using the same shared
-    // cache as our inferior. The dylibs from the shared cache might not
-    // exist on the filesystem, so let's use the images in our own memory
-    // to create the modules.
-
-    // Check if the requested image is in our shared cache.
-    SharedCacheImageInfo image_info =
-        HostInfo::GetSharedCacheImageInfo(module_spec.GetFileSpec().GetPath());
-
-    // If we found it and it has the correct UUID, let's proceed with
-    // creating a module from the memory contents.
-    if (image_info.uuid &&
-        (!module_spec.GetUUID() || module_spec.GetUUID() == image_info.uuid)) {
-      ModuleSpec shared_cache_spec(module_spec.GetFileSpec(), image_info.uuid,
-                                   image_info.data_sp);
-      err = ModuleList::GetSharedModule(shared_cache_spec, module_sp,
-                                        module_search_paths_ptr, old_modules,
-                                        did_create_ptr);
-      if (module_sp)
-        return err;
-    }
-  }
-
-  err = ModuleList::GetSharedModule(module_spec, module_sp,
-                                    module_search_paths_ptr, old_modules,
-                                    did_create_ptr);
-  if (module_sp)
-    return err;
-
-  if (!IsHost()) {
-    std::string cache_path(GetLocalCacheDirectory());
-    // Only search for a locally cached file if we have a valid cache path
-    if (!cache_path.empty()) {
-      std::string module_path(module_spec.GetFileSpec().GetPath());
-      cache_path.append(module_path);
-      FileSpec module_cache_spec(cache_path);
-
-      // if rsync is supported, always bring in the file - rsync will be very
-      // efficient when files are the same on the local and remote end of the
-      // connection
-      if (this->GetSupportsRSync()) {
-        err = BringInRemoteFile(this, module_spec, module_cache_spec);
-        if (err.Fail())
-          return err;
-        if (FileSystem::Instance().Exists(module_cache_spec)) {
-          Log *log = GetLog(LLDBLog::Platform);
-          LLDB_LOGF(log, "[%s] module %s/%s was rsynced and is now there",
-                    (IsHost() ? "host" : "remote"),
-                    module_spec.GetFileSpec().GetDirectory().AsCString(),
-                    module_spec.GetFileSpec().GetFilename().AsCString());
-          ModuleSpec local_spec(module_cache_spec,
-                                module_spec.GetArchitecture());
-          module_sp = std::make_shared<Module>(local_spec);
-          module_sp->SetPlatformFileSpec(module_spec.GetFileSpec());
-          return Status();
-        }
-      }
-
-      // try to find the module in the cache
-      if (FileSystem::Instance().Exists(module_cache_spec)) {
-        // get the local and remote MD5 and compare
-        if (m_remote_platform_sp) {
-          // when going over the *slow* GDB remote transfer mechanism we first
-          // check the hashes of the files - and only do the actual transfer if
-          // they differ
-          uint64_t high_local, high_remote, low_local, low_remote;
-          auto MD5 = llvm::sys::fs::md5_contents(module_cache_spec.GetPath());
-          if (!MD5)
-            return Status(MD5.getError());
-          std::tie(high_local, low_local) = MD5->words();
-
-          m_remote_platform_sp->CalculateMD5(module_spec.GetFileSpec(),
-                                             low_remote, high_remote);
-          if (low_local != low_remote || high_local != high_remote) {
-            // bring in the remote file
-            Log *log = GetLog(LLDBLog::Platform);
-            LLDB_LOGF(log,
-                      "[%s] module %s/%s needs to be replaced from remote copy",
-                      (IsHost() ? "host" : "remote"),
-                      module_spec.GetFileSpec().GetDirectory().AsCString(),
-                      module_spec.GetFileSpec().GetFilename().AsCString());
-            Status err =
-                BringInRemoteFile(this, module_spec, module_cache_spec);
-            if (err.Fail())
-              return err;
-          }
-        }
-
-        ModuleSpec local_spec(module_cache_spec, module_spec.GetArchitecture());
-        module_sp = std::make_shared<Module>(local_spec);
-        module_sp->SetPlatformFileSpec(module_spec.GetFileSpec());
-        Log *log = GetLog(LLDBLog::Platform);
-        LLDB_LOGF(log, "[%s] module %s/%s was found in the cache",
-                  (IsHost() ? "host" : "remote"),
-                  module_spec.GetFileSpec().GetDirectory().AsCString(),
-                  module_spec.GetFileSpec().GetFilename().AsCString());
-        return Status();
-      }
-
-      // bring in the remote module file
-      LLDB_LOGF(log, "[%s] module %s/%s needs to come in remotely",
-                (IsHost() ? "host" : "remote"),
-                module_spec.GetFileSpec().GetDirectory().AsCString(),
-                module_spec.GetFileSpec().GetFilename().AsCString());
-      Status err = BringInRemoteFile(this, module_spec, module_cache_spec);
-      if (err.Fail())
-        return err;
-      if (FileSystem::Instance().Exists(module_cache_spec)) {
-        Log *log = GetLog(LLDBLog::Platform);
-        LLDB_LOGF(log, "[%s] module %s/%s is now cached and fine",
-                  (IsHost() ? "host" : "remote"),
-                  module_spec.GetFileSpec().GetDirectory().AsCString(),
-                  module_spec.GetFileSpec().GetFilename().AsCString());
-        ModuleSpec local_spec(module_cache_spec, module_spec.GetArchitecture());
-        module_sp = std::make_shared<Module>(local_spec);
-        module_sp->SetPlatformFileSpec(module_spec.GetFileSpec());
-        return Status();
-      } else
-        return Status("unable to obtain valid module file");
-    } else
-      return Status("no cache path");
-  } else
-    return Status("unable to resolve module");
-}
-
 Status PlatformDarwin::GetSharedModule(
     const ModuleSpec &module_spec, Process *process, ModuleSP &module_sp,
     const FileSpecList *module_search_paths_ptr,
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h
index edc8faa..b4c791e 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h
@@ -144,11 +144,6 @@ protected:
 
   void ReadLibdispatchOffsets(Process *process);
 
-  virtual Status GetSharedModuleWithLocalCache(
-      const ModuleSpec &module_spec, lldb::ModuleSP &module_sp,
-      const FileSpecList *module_search_paths_ptr,
-      llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules, bool *did_create_ptr);
-
   virtual bool CheckLocalSharedCache() const { return IsHost(); }
 
   struct SDKEnumeratorInfo {
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.cpp
new file mode 100644
index 0000000..7feaef9
--- /dev/null
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.cpp
@@ -0,0 +1,456 @@
+//===-- PlatformDarwinDevice.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PlatformDarwinDevice.h"
+#include "lldb/Core/Module.h"
+#include "lldb/Core/ModuleList.h"
+#include "lldb/Core/ModuleSpec.h"
+#include "lldb/Host/HostInfo.h"
+#include "lldb/Utility/FileSpec.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/Log.h"
+
+using namespace lldb;
+using namespace lldb_private;
+
+PlatformDarwinDevice::~PlatformDarwinDevice() = default;
+
+FileSystem::EnumerateDirectoryResult
+PlatformDarwinDevice::GetContainedFilesIntoVectorOfStringsCallback(
+    void *baton, llvm::sys::fs::file_type ft, llvm::StringRef path) {
+  ((PlatformDarwinDevice::SDKDirectoryInfoCollection *)baton)
+      ->push_back(PlatformDarwinDevice::SDKDirectoryInfo(FileSpec(path)));
+  return FileSystem::eEnumerateDirectoryResultNext;
+}
+
+bool PlatformDarwinDevice::UpdateSDKDirectoryInfosIfNeeded() {
+  Log *log = GetLog(LLDBLog::Host);
+  std::lock_guard<std::mutex> guard(m_sdk_dir_mutex);
+  if (m_sdk_directory_infos.empty()) {
+    // A --sysroot option was supplied - add it to our list of SDKs to check
+    if (m_sdk_sysroot) {
+      FileSpec sdk_sysroot_fspec(m_sdk_sysroot.GetCString());
+      FileSystem::Instance().Resolve(sdk_sysroot_fspec);
+      const SDKDirectoryInfo sdk_sysroot_directory_info(sdk_sysroot_fspec);
+      m_sdk_directory_infos.push_back(sdk_sysroot_directory_info);
+      if (log) {
+        LLDB_LOGF(log,
+                  "PlatformDarwinDevice::UpdateSDKDirectoryInfosIfNeeded added "
+                  "--sysroot SDK directory %s",
+                  m_sdk_sysroot.GetCString());
+      }
+      return true;
+    }
+    const char *device_support_dir = GetDeviceSupportDirectory();
+    if (log) {
+      LLDB_LOGF(log,
+                "PlatformDarwinDevice::UpdateSDKDirectoryInfosIfNeeded Got "
+                "DeviceSupport directory %s",
+                device_support_dir);
+    }
+    if (device_support_dir) {
+      const bool find_directories = true;
+      const bool find_files = false;
+      const bool find_other = false;
+
+      SDKDirectoryInfoCollection builtin_sdk_directory_infos;
+      FileSystem::Instance().EnumerateDirectory(
+          m_device_support_directory, find_directories, find_files, find_other,
+          GetContainedFilesIntoVectorOfStringsCallback,
+          &builtin_sdk_directory_infos);
+
+      // Only add SDK directories that have symbols in them, some SDKs only
+      // contain developer disk images and no symbols, so they aren't useful to
+      // us.
+      FileSpec sdk_symbols_symlink_fspec;
+      for (const auto &sdk_directory_info : builtin_sdk_directory_infos) {
+        sdk_symbols_symlink_fspec = sdk_directory_info.directory;
+        sdk_symbols_symlink_fspec.AppendPathComponent("Symbols");
+        if (FileSystem::Instance().Exists(sdk_symbols_symlink_fspec)) {
+          m_sdk_directory_infos.push_back(sdk_directory_info);
+          if (log) {
+            LLDB_LOGF(log,
+                      "PlatformDarwinDevice::UpdateSDKDirectoryInfosIfNeeded "
+                      "added builtin SDK directory %s",
+                      sdk_symbols_symlink_fspec.GetPath().c_str());
+          }
+        }
+      }
+
+      const uint32_t num_installed = m_sdk_directory_infos.size();
+      llvm::StringRef dirname = GetDeviceSupportDirectoryName();
+      std::string local_sdk_cache_str = "~/Library/Developer/Xcode/";
+      local_sdk_cache_str += std::string(dirname);
+      FileSpec local_sdk_cache(local_sdk_cache_str.c_str());
+      FileSystem::Instance().Resolve(local_sdk_cache);
+      if (FileSystem::Instance().Exists(local_sdk_cache)) {
+        if (log) {
+          LLDB_LOGF(log,
+                    "PlatformDarwinDevice::UpdateSDKDirectoryInfosIfNeeded "
+                    "searching %s for additional SDKs",
+                    local_sdk_cache.GetPath().c_str());
+        }
+        char path[PATH_MAX];
+        if (local_sdk_cache.GetPath(path, sizeof(path))) {
+          FileSystem::Instance().EnumerateDirectory(
+              path, find_directories, find_files, find_other,
+              GetContainedFilesIntoVectorOfStringsCallback,
+              &m_sdk_directory_infos);
+          const uint32_t num_sdk_infos = m_sdk_directory_infos.size();
+          // First try for an exact match of major, minor and update
+          for (uint32_t i = num_installed; i < num_sdk_infos; ++i) {
+            m_sdk_directory_infos[i].user_cached = true;
+            if (log) {
+              LLDB_LOGF(log,
+                        "PlatformDarwinDevice::"
+                        "UpdateSDKDirectoryInfosIfNeeded "
+                        "user SDK directory %s",
+                        m_sdk_directory_infos[i].directory.GetPath().c_str());
+            }
+          }
+        }
+      }
+
+      const char *addtional_platform_dirs = getenv("PLATFORM_SDK_DIRECTORY");
+      if (addtional_platform_dirs) {
+        SDKDirectoryInfoCollection env_var_sdk_directory_infos;
+        FileSystem::Instance().EnumerateDirectory(
+            addtional_platform_dirs, find_directories, find_files, find_other,
+            GetContainedFilesIntoVectorOfStringsCallback,
+            &env_var_sdk_directory_infos);
+        FileSpec sdk_symbols_symlink_fspec;
+        for (const auto &sdk_directory_info : env_var_sdk_directory_infos) {
+          sdk_symbols_symlink_fspec = sdk_directory_info.directory;
+          sdk_symbols_symlink_fspec.AppendPathComponent("Symbols");
+          if (FileSystem::Instance().Exists(sdk_symbols_symlink_fspec)) {
+            m_sdk_directory_infos.push_back(sdk_directory_info);
+            if (log) {
+              LLDB_LOGF(log,
+                        "PlatformDarwinDevice::UpdateSDKDirectoryInfosIfNeeded "
+                        "added env var SDK directory %s",
+                        sdk_symbols_symlink_fspec.GetPath().c_str());
+            }
+          }
+        }
+      }
+    }
+  }
+  return !m_sdk_directory_infos.empty();
+}
+
+const PlatformDarwinDevice::SDKDirectoryInfo *
+PlatformDarwinDevice::GetSDKDirectoryForCurrentOSVersion() {
+  uint32_t i;
+  if (UpdateSDKDirectoryInfosIfNeeded()) {
+    const uint32_t num_sdk_infos = m_sdk_directory_infos.size();
+
+    // Check to see if the user specified a build string. If they did, then be
+    // sure to match it.
+    std::vector<bool> check_sdk_info(num_sdk_infos, true);
+    ConstString build(m_sdk_build);
+    if (build) {
+      for (i = 0; i < num_sdk_infos; ++i)
+        check_sdk_info[i] = m_sdk_directory_infos[i].build == build;
+    }
+
+    // If we are connected we can find the version of the OS the platform us
+    // running on and select the right SDK
+    llvm::VersionTuple version = GetOSVersion();
+    if (!version.empty()) {
+      if (UpdateSDKDirectoryInfosIfNeeded()) {
+        // First try for an exact match of major, minor and update
+        for (i = 0; i < num_sdk_infos; ++i) {
+          if (check_sdk_info[i]) {
+            if (m_sdk_directory_infos[i].version == version)
+              return &m_sdk_directory_infos[i];
+          }
+        }
+        // First try for an exact match of major and minor
+        for (i = 0; i < num_sdk_infos; ++i) {
+          if (check_sdk_info[i]) {
+            if (m_sdk_directory_infos[i].version.getMajor() ==
+                    version.getMajor() &&
+                m_sdk_directory_infos[i].version.getMinor() ==
+                    version.getMinor()) {
+              return &m_sdk_directory_infos[i];
+            }
+          }
+        }
+        // Lastly try to match of major version only..
+        for (i = 0; i < num_sdk_infos; ++i) {
+          if (check_sdk_info[i]) {
+            if (m_sdk_directory_infos[i].version.getMajor() ==
+                version.getMajor()) {
+              return &m_sdk_directory_infos[i];
+            }
+          }
+        }
+      }
+    } else if (build) {
+      // No version, just a build number, search for the first one that matches
+      for (i = 0; i < num_sdk_infos; ++i)
+        if (check_sdk_info[i])
+          return &m_sdk_directory_infos[i];
+    }
+  }
+  return nullptr;
+}
+
+const PlatformDarwinDevice::SDKDirectoryInfo *
+PlatformDarwinDevice::GetSDKDirectoryForLatestOSVersion() {
+  const PlatformDarwinDevice::SDKDirectoryInfo *result = nullptr;
+  if (UpdateSDKDirectoryInfosIfNeeded()) {
+    auto max = std::max_element(
+        m_sdk_directory_infos.begin(), m_sdk_directory_infos.end(),
+        [](const SDKDirectoryInfo &a, const SDKDirectoryInfo &b) {
+          return a.version < b.version;
+        });
+    if (max != m_sdk_directory_infos.end())
+      result = &*max;
+  }
+  return result;
+}
+
+const char *PlatformDarwinDevice::GetDeviceSupportDirectory() {
+  std::string platform_dir =
+      ("/Platforms/" + GetPlatformName() + "/DeviceSupport").str();
+  if (m_device_support_directory.empty()) {
+    if (FileSpec fspec = HostInfo::GetXcodeDeveloperDirectory()) {
+      m_device_support_directory = fspec.GetPath();
+      m_device_support_directory.append(platform_dir.c_str());
+    } else {
+      // Assign a single NULL character so we know we tried to find the device
+      // support directory and we don't keep trying to find it over and over.
+      m_device_support_directory.assign(1, '\0');
+    }
+  }
+  // We should have put a single NULL character into m_device_support_directory
+  // or it should have a valid path if the code gets here
+  assert(m_device_support_directory.empty() == false);
+  if (m_device_support_directory[0])
+    return m_device_support_directory.c_str();
+  return nullptr;
+}
+
+const char *PlatformDarwinDevice::GetDeviceSupportDirectoryForOSVersion() {
+  if (m_sdk_sysroot)
+    return m_sdk_sysroot.GetCString();
+
+  if (m_device_support_directory_for_os_version.empty()) {
+    const PlatformDarwinDevice::SDKDirectoryInfo *sdk_dir_info =
+        GetSDKDirectoryForCurrentOSVersion();
+    if (sdk_dir_info == nullptr)
+      sdk_dir_info = GetSDKDirectoryForLatestOSVersion();
+    if (sdk_dir_info) {
+      char path[PATH_MAX];
+      if (sdk_dir_info->directory.GetPath(path, sizeof(path))) {
+        m_device_support_directory_for_os_version = path;
+        return m_device_support_directory_for_os_version.c_str();
+      }
+    } else {
+      // Assign a single NULL character so we know we tried to find the device
+      // support directory and we don't keep trying to find it over and over.
+      m_device_support_directory_for_os_version.assign(1, '\0');
+    }
+  }
+  // We should have put a single NULL character into
+  // m_device_support_directory_for_os_version or it should have a valid path
+  // if the code gets here
+  assert(m_device_support_directory_for_os_version.empty() == false);
+  if (m_device_support_directory_for_os_version[0])
+    return m_device_support_directory_for_os_version.c_str();
+  return nullptr;
+}
+
+static lldb_private::Status
+MakeCacheFolderForFile(const FileSpec &module_cache_spec) {
+  FileSpec module_cache_folder =
+      module_cache_spec.CopyByRemovingLastPathComponent();
+  return llvm::sys::fs::create_directory(module_cache_folder.GetPath());
+}
+
+static lldb_private::Status
+BringInRemoteFile(Platform *platform,
+                  const lldb_private::ModuleSpec &module_spec,
+                  const FileSpec &module_cache_spec) {
+  MakeCacheFolderForFile(module_cache_spec);
+  Status err = platform->GetFile(module_spec.GetFileSpec(), module_cache_spec);
+  return err;
+}
+
+lldb_private::Status PlatformDarwinDevice::GetSharedModuleWithLocalCache(
+    const lldb_private::ModuleSpec &module_spec, lldb::ModuleSP &module_sp,
+    const lldb_private::FileSpecList *module_search_paths_ptr,
+    llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules, bool *did_create_ptr) {
+
+  Log *log = GetLog(LLDBLog::Platform);
+  LLDB_LOGF(log,
+            "[%s] Trying to find module %s/%s - platform path %s/%s symbol "
+            "path %s/%s",
+            (IsHost() ? "host" : "remote"),
+            module_spec.GetFileSpec().GetDirectory().AsCString(),
+            module_spec.GetFileSpec().GetFilename().AsCString(),
+            module_spec.GetPlatformFileSpec().GetDirectory().AsCString(),
+            module_spec.GetPlatformFileSpec().GetFilename().AsCString(),
+            module_spec.GetSymbolFileSpec().GetDirectory().AsCString(),
+            module_spec.GetSymbolFileSpec().GetFilename().AsCString());
+
+  Status err;
+
+  if (CheckLocalSharedCache()) {
+    // When debugging on the host, we are most likely using the same shared
+    // cache as our inferior. The dylibs from the shared cache might not
+    // exist on the filesystem, so let's use the images in our own memory
+    // to create the modules.
+
+    // Check if the requested image is in our shared cache.
+    SharedCacheImageInfo image_info =
+        HostInfo::GetSharedCacheImageInfo(module_spec.GetFileSpec().GetPath());
+
+    // If we found it and it has the correct UUID, let's proceed with
+    // creating a module from the memory contents.
+    if (image_info.uuid &&
+        (!module_spec.GetUUID() || module_spec.GetUUID() == image_info.uuid)) {
+      ModuleSpec shared_cache_spec(module_spec.GetFileSpec(), image_info.uuid,
+                                   image_info.data_sp);
+      err = ModuleList::GetSharedModule(shared_cache_spec, module_sp,
+                                        module_search_paths_ptr, old_modules,
+                                        did_create_ptr);
+      if (module_sp) {
+        LLDB_LOGF(log, "[%s] module %s was found in the in-memory shared cache",
+                  (IsHost() ? "host" : "remote"),
+                  module_spec.GetFileSpec().GetPath().c_str());
+        return err;
+      }
+    }
+
+    // We failed to find the module in our shared cache. Let's see if we have a
+    // copy in our device support directory.
+    FileSpec device_support_spec(GetDeviceSupportDirectoryForOSVersion());
+    device_support_spec.AppendPathComponent("Symbols");
+    device_support_spec.AppendPathComponent(
+        module_spec.GetFileSpec().GetPath());
+    FileSystem::Instance().Resolve(device_support_spec);
+    if (FileSystem::Instance().Exists(device_support_spec)) {
+      ModuleSpec local_spec(device_support_spec, module_spec.GetUUID());
+      err = ModuleList::GetSharedModule(local_spec, module_sp,
+                                        module_search_paths_ptr, old_modules,
+                                        did_create_ptr);
+      if (module_sp) {
+        LLDB_LOGF(log,
+                  "[%s] module %s was found in Device Support "
+                  "directory: %s",
+                  (IsHost() ? "host" : "remote"),
+                  module_spec.GetFileSpec().GetPath().c_str(),
+                  local_spec.GetFileSpec().GetPath().c_str());
+        return err;
+      }
+    }
+  }
+
+  err = ModuleList::GetSharedModule(module_spec, module_sp,
+                                    module_search_paths_ptr, old_modules,
+                                    did_create_ptr);
+  if (module_sp)
+    return err;
+
+  if (!IsHost()) {
+    std::string cache_path(GetLocalCacheDirectory());
+    // Only search for a locally cached file if we have a valid cache path
+    if (!cache_path.empty()) {
+      std::string module_path(module_spec.GetFileSpec().GetPath());
+      cache_path.append(module_path);
+      FileSpec module_cache_spec(cache_path);
+
+      // if rsync is supported, always bring in the file - rsync will be very
+      // efficient when files are the same on the local and remote end of the
+      // connection
+      if (this->GetSupportsRSync()) {
+        err = BringInRemoteFile(this, module_spec, module_cache_spec);
+        if (err.Fail())
+          return err;
+        if (FileSystem::Instance().Exists(module_cache_spec)) {
+          Log *log = GetLog(LLDBLog::Platform);
+          LLDB_LOGF(log, "[%s] module %s/%s was rsynced and is now there",
+                    (IsHost() ? "host" : "remote"),
+                    module_spec.GetFileSpec().GetDirectory().AsCString(),
+                    module_spec.GetFileSpec().GetFilename().AsCString());
+          ModuleSpec local_spec(module_cache_spec,
+                                module_spec.GetArchitecture());
+          module_sp = std::make_shared<Module>(local_spec);
+          module_sp->SetPlatformFileSpec(module_spec.GetFileSpec());
+          return Status();
+        }
+      }
+
+      // try to find the module in the cache
+      if (FileSystem::Instance().Exists(module_cache_spec)) {
+        // get the local and remote MD5 and compare
+        if (m_remote_platform_sp) {
+          // when going over the *slow* GDB remote transfer mechanism we first
+          // check the hashes of the files - and only do the actual transfer if
+          // they differ
+          uint64_t high_local, high_remote, low_local, low_remote;
+          auto MD5 = llvm::sys::fs::md5_contents(module_cache_spec.GetPath());
+          if (!MD5)
+            return Status(MD5.getError());
+          std::tie(high_local, low_local) = MD5->words();
+
+          m_remote_platform_sp->CalculateMD5(module_spec.GetFileSpec(),
+                                             low_remote, high_remote);
+          if (low_local != low_remote || high_local != high_remote) {
+            // bring in the remote file
+            Log *log = GetLog(LLDBLog::Platform);
+            LLDB_LOGF(log,
+                      "[%s] module %s/%s needs to be replaced from remote copy",
+                      (IsHost() ? "host" : "remote"),
+                      module_spec.GetFileSpec().GetDirectory().AsCString(),
+                      module_spec.GetFileSpec().GetFilename().AsCString());
+            Status err =
+                BringInRemoteFile(this, module_spec, module_cache_spec);
+            if (err.Fail())
+              return err;
+          }
+        }
+
+        ModuleSpec local_spec(module_cache_spec, module_spec.GetArchitecture());
+        module_sp = std::make_shared<Module>(local_spec);
+        module_sp->SetPlatformFileSpec(module_spec.GetFileSpec());
+        Log *log = GetLog(LLDBLog::Platform);
+        LLDB_LOGF(log, "[%s] module %s/%s was found in the cache",
+                  (IsHost() ? "host" : "remote"),
+                  module_spec.GetFileSpec().GetDirectory().AsCString(),
+                  module_spec.GetFileSpec().GetFilename().AsCString());
+        return Status();
+      }
+
+      // bring in the remote module file
+      LLDB_LOGF(log, "[%s] module %s/%s needs to come in remotely",
+                (IsHost() ? "host" : "remote"),
+                module_spec.GetFileSpec().GetDirectory().AsCString(),
+                module_spec.GetFileSpec().GetFilename().AsCString());
+      Status err = BringInRemoteFile(this, module_spec, module_cache_spec);
+      if (err.Fail())
+        return err;
+      if (FileSystem::Instance().Exists(module_cache_spec)) {
+        Log *log = GetLog(LLDBLog::Platform);
+        LLDB_LOGF(log, "[%s] module %s/%s is now cached and fine",
+                  (IsHost() ? "host" : "remote"),
+                  module_spec.GetFileSpec().GetDirectory().AsCString(),
+                  module_spec.GetFileSpec().GetFilename().AsCString());
+        ModuleSpec local_spec(module_cache_spec, module_spec.GetArchitecture());
+        module_sp = std::make_shared<Module>(local_spec);
+        module_sp->SetPlatformFileSpec(module_spec.GetFileSpec());
+        return Status();
+      } else
+        return Status("unable to obtain valid module file");
+    } else
+      return Status("no cache path");
+  } else
+    return Status("unable to resolve module");
+}
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.h
new file mode 100644
index 0000000..e1eba08f
--- /dev/null
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.h
@@ -0,0 +1,67 @@
+//===-- PlatformDarwinDevice.h ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMDARWINDEVICE_H
+#define LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMDARWINDEVICE_H
+
+#include "PlatformDarwin.h"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <string>
+
+namespace lldb_private {
+
+/// Abstract Darwin platform with a potential device support directory.
+class PlatformDarwinDevice : public PlatformDarwin {
+public:
+  using PlatformDarwin::PlatformDarwin;
+  ~PlatformDarwinDevice() override;
+
+protected:
+  virtual Status GetSharedModuleWithLocalCache(
+      const ModuleSpec &module_spec, lldb::ModuleSP &module_sp,
+      const FileSpecList *module_search_paths_ptr,
+      llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules, bool *did_create_ptr);
+
+  struct SDKDirectoryInfo {
+    SDKDirectoryInfo(const FileSpec &sdk_dir_spec);
+    FileSpec directory;
+    ConstString build;
+    llvm::VersionTuple version;
+    bool user_cached;
+  };
+
+  typedef std::vector<SDKDirectoryInfo> SDKDirectoryInfoCollection;
+
+  bool UpdateSDKDirectoryInfosIfNeeded();
+
+  const SDKDirectoryInfo *GetSDKDirectoryForLatestOSVersion();
+  const SDKDirectoryInfo *GetSDKDirectoryForCurrentOSVersion();
+
+  static FileSystem::EnumerateDirectoryResult
+  GetContainedFilesIntoVectorOfStringsCallback(void *baton,
+                                               llvm::sys::fs::file_type ft,
+                                               llvm::StringRef path);
+
+  const char *GetDeviceSupportDirectory();
+  const char *GetDeviceSupportDirectoryForOSVersion();
+
+  virtual llvm::StringRef GetPlatformName() = 0;
+  virtual llvm::StringRef GetDeviceSupportDirectoryName() = 0;
+
+  std::mutex m_sdk_dir_mutex;
+  SDKDirectoryInfoCollection m_sdk_directory_infos;
+
+private:
+  std::string m_device_support_directory;
+  std::string m_device_support_directory_for_os_version;
+};
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMDARWINDEVICE_H
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp
index e48036eba..6579558 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp
@@ -96,7 +96,7 @@ PlatformSP PlatformMacOSX::CreateInstance(bool force, const ArchSpec *arch) {
 }
 
 /// Default Constructor
-PlatformMacOSX::PlatformMacOSX() : PlatformDarwin(true) {}
+PlatformMacOSX::PlatformMacOSX() : PlatformDarwinDevice(true) {}
 
 ConstString PlatformMacOSX::GetSDKDirectory(lldb_private::Target &target) {
   ModuleSP exe_module_sp(target.GetExecutableModule());
@@ -211,3 +211,9 @@ lldb_private::Status PlatformMacOSX::GetSharedModule(
   }
   return error;
 }
+
+llvm::StringRef PlatformMacOSX::GetDeviceSupportDirectoryName() {
+  return "macOS DeviceSupport";
+}
+
+llvm::StringRef PlatformMacOSX::GetPlatformName() { return "MacOSX.platform"; }
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h
index 2843a8a..be84485 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h
@@ -9,7 +9,7 @@
 #ifndef LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMMACOSX_H
 #define LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMMACOSX_H
 
-#include "PlatformDarwin.h"
+#include "PlatformDarwinDevice.h"
 #include "lldb/Target/Platform.h"
 #include "lldb/Utility/ConstString.h"
 #include "lldb/Utility/Status.h"
@@ -28,7 +28,7 @@ class ModuleSpec;
 class Process;
 class Target;
 
-class PlatformMacOSX : public PlatformDarwin {
+class PlatformMacOSX : public PlatformDarwinDevice {
 public:
   PlatformMacOSX();
 
@@ -69,6 +69,10 @@ public:
     return PlatformDarwin::AddClangModuleCompilationOptionsForSDKType(
         target, options, XcodeSDK::Type::MacOSX);
   }
+
+protected:
+  llvm::StringRef GetDeviceSupportDirectoryName() override;
+  llvm::StringRef GetPlatformName() override;
 };
 
 } // namespace lldb_private
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp
index be03cdbd5..7dd8c38 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp
@@ -38,9 +38,7 @@ PlatformRemoteDarwinDevice::SDKDirectoryInfo::SDKDirectoryInfo(
 
 /// Default Constructor
 PlatformRemoteDarwinDevice::PlatformRemoteDarwinDevice()
-    : PlatformDarwin(false), // This is a remote platform
-      m_sdk_directory_infos(), m_device_support_directory(),
-      m_device_support_directory_for_os_version(), m_build_update() {}
+    : PlatformDarwinDevice(false) {} // This is a remote platform
 
 /// Destructor.
 ///
@@ -129,278 +127,6 @@ Status PlatformRemoteDarwinDevice::ResolveExecutable(
   return error;
 }
 
-FileSystem::EnumerateDirectoryResult
-PlatformRemoteDarwinDevice::GetContainedFilesIntoVectorOfStringsCallback(
-    void *baton, llvm::sys::fs::file_type ft, llvm::StringRef path) {
-  ((PlatformRemoteDarwinDevice::SDKDirectoryInfoCollection *)baton)
-      ->push_back(PlatformRemoteDarwinDevice::SDKDirectoryInfo(FileSpec(path)));
-  return FileSystem::eEnumerateDirectoryResultNext;
-}
-
-bool PlatformRemoteDarwinDevice::UpdateSDKDirectoryInfosIfNeeded() {
-  Log *log = GetLog(LLDBLog::Host);
-  std::lock_guard<std::mutex> guard(m_sdk_dir_mutex);
-  if (m_sdk_directory_infos.empty()) {
-    // A --sysroot option was supplied - add it to our list of SDKs to check
-    if (m_sdk_sysroot) {
-      FileSpec sdk_sysroot_fspec(m_sdk_sysroot.GetCString());
-      FileSystem::Instance().Resolve(sdk_sysroot_fspec);
-      const SDKDirectoryInfo sdk_sysroot_directory_info(sdk_sysroot_fspec);
-      m_sdk_directory_infos.push_back(sdk_sysroot_directory_info);
-      if (log) {
-        LLDB_LOGF(
-            log,
-            "PlatformRemoteDarwinDevice::UpdateSDKDirectoryInfosIfNeeded added "
-            "--sysroot SDK directory %s",
-            m_sdk_sysroot.GetCString());
-      }
-      return true;
-    }
-    const char *device_support_dir = GetDeviceSupportDirectory();
-    if (log) {
-      LLDB_LOGF(
-          log,
-          "PlatformRemoteDarwinDevice::UpdateSDKDirectoryInfosIfNeeded Got "
-          "DeviceSupport directory %s",
-          device_support_dir);
-    }
-    if (device_support_dir) {
-      const bool find_directories = true;
-      const bool find_files = false;
-      const bool find_other = false;
-
-      SDKDirectoryInfoCollection builtin_sdk_directory_infos;
-      FileSystem::Instance().EnumerateDirectory(
-          m_device_support_directory, find_directories, find_files, find_other,
-          GetContainedFilesIntoVectorOfStringsCallback,
-          &builtin_sdk_directory_infos);
-
-      // Only add SDK directories that have symbols in them, some SDKs only
-      // contain developer disk images and no symbols, so they aren't useful to
-      // us.
-      FileSpec sdk_symbols_symlink_fspec;
-      for (const auto &sdk_directory_info : builtin_sdk_directory_infos) {
-        sdk_symbols_symlink_fspec = sdk_directory_info.directory;
-        sdk_symbols_symlink_fspec.AppendPathComponent("Symbols");
-        if (FileSystem::Instance().Exists(sdk_symbols_symlink_fspec)) {
-          m_sdk_directory_infos.push_back(sdk_directory_info);
-          if (log) {
-            LLDB_LOGF(
-                log,
-                "PlatformRemoteDarwinDevice::UpdateSDKDirectoryInfosIfNeeded "
-                "added builtin SDK directory %s",
-                sdk_symbols_symlink_fspec.GetPath().c_str());
-          }
-        }
-      }
-
-      const uint32_t num_installed = m_sdk_directory_infos.size();
-      llvm::StringRef dirname = GetDeviceSupportDirectoryName();
-      std::string local_sdk_cache_str = "~/Library/Developer/Xcode/";
-      local_sdk_cache_str += std::string(dirname);
-      FileSpec local_sdk_cache(local_sdk_cache_str.c_str());
-      FileSystem::Instance().Resolve(local_sdk_cache);
-      if (FileSystem::Instance().Exists(local_sdk_cache)) {
-        if (log) {
-          LLDB_LOGF(
-              log,
-              "PlatformRemoteDarwinDevice::UpdateSDKDirectoryInfosIfNeeded "
-              "searching %s for additional SDKs",
-              local_sdk_cache.GetPath().c_str());
-        }
-        char path[PATH_MAX];
-        if (local_sdk_cache.GetPath(path, sizeof(path))) {
-          FileSystem::Instance().EnumerateDirectory(
-              path, find_directories, find_files, find_other,
-              GetContainedFilesIntoVectorOfStringsCallback,
-              &m_sdk_directory_infos);
-          const uint32_t num_sdk_infos = m_sdk_directory_infos.size();
-          // First try for an exact match of major, minor and update
-          for (uint32_t i = num_installed; i < num_sdk_infos; ++i) {
-            m_sdk_directory_infos[i].user_cached = true;
-            if (log) {
-              LLDB_LOGF(log,
-                        "PlatformRemoteDarwinDevice::"
-                        "UpdateSDKDirectoryInfosIfNeeded "
-                        "user SDK directory %s",
-                        m_sdk_directory_infos[i].directory.GetPath().c_str());
-            }
-          }
-        }
-      }
-
-      const char *addtional_platform_dirs = getenv("PLATFORM_SDK_DIRECTORY");
-      if (addtional_platform_dirs) {
-        SDKDirectoryInfoCollection env_var_sdk_directory_infos;
-        FileSystem::Instance().EnumerateDirectory(
-            addtional_platform_dirs, find_directories, find_files, find_other,
-            GetContainedFilesIntoVectorOfStringsCallback,
-            &env_var_sdk_directory_infos);
-        FileSpec sdk_symbols_symlink_fspec;
-        for (const auto &sdk_directory_info : env_var_sdk_directory_infos) {
-          sdk_symbols_symlink_fspec = sdk_directory_info.directory;
-          sdk_symbols_symlink_fspec.AppendPathComponent("Symbols");
-          if (FileSystem::Instance().Exists(sdk_symbols_symlink_fspec)) {
-            m_sdk_directory_infos.push_back(sdk_directory_info);
-            if (log) {
-              LLDB_LOGF(
-                  log,
-                  "PlatformRemoteDarwinDevice::UpdateSDKDirectoryInfosIfNeeded "
-                  "added env var SDK directory %s",
-                  sdk_symbols_symlink_fspec.GetPath().c_str());
-            }
-          }
-        }
-      }
-
-    }
-  }
-  return !m_sdk_directory_infos.empty();
-}
-
-const PlatformRemoteDarwinDevice::SDKDirectoryInfo *
-PlatformRemoteDarwinDevice::GetSDKDirectoryForCurrentOSVersion() {
-  uint32_t i;
-  if (UpdateSDKDirectoryInfosIfNeeded()) {
-    const uint32_t num_sdk_infos = m_sdk_directory_infos.size();
-
-    // Check to see if the user specified a build string. If they did, then be
-    // sure to match it.
-    std::vector<bool> check_sdk_info(num_sdk_infos, true);
-    ConstString build(m_sdk_build);
-    if (build) {
-      for (i = 0; i < num_sdk_infos; ++i)
-        check_sdk_info[i] = m_sdk_directory_infos[i].build == build;
-    }
-
-    // If we are connected we can find the version of the OS the platform us
-    // running on and select the right SDK
-    llvm::VersionTuple version = GetOSVersion();
-    if (!version.empty()) {
-      if (UpdateSDKDirectoryInfosIfNeeded()) {
-        // First try for an exact match of major, minor and update
-        for (i = 0; i < num_sdk_infos; ++i) {
-          if (check_sdk_info[i]) {
-            if (m_sdk_directory_infos[i].version == version)
-              return &m_sdk_directory_infos[i];
-          }
-        }
-        // First try for an exact match of major and minor
-        for (i = 0; i < num_sdk_infos; ++i) {
-          if (check_sdk_info[i]) {
-            if (m_sdk_directory_infos[i].version.getMajor() ==
-                    version.getMajor() &&
-                m_sdk_directory_infos[i].version.getMinor() ==
-                    version.getMinor()) {
-              return &m_sdk_directory_infos[i];
-            }
-          }
-        }
-        // Lastly try to match of major version only..
-        for (i = 0; i < num_sdk_infos; ++i) {
-          if (check_sdk_info[i]) {
-            if (m_sdk_directory_infos[i].version.getMajor() ==
-                version.getMajor()) {
-              return &m_sdk_directory_infos[i];
-            }
-          }
-        }
-      }
-    } else if (build) {
-      // No version, just a build number, search for the first one that matches
-      for (i = 0; i < num_sdk_infos; ++i)
-        if (check_sdk_info[i])
-          return &m_sdk_directory_infos[i];
-    }
-  }
-  return nullptr;
-}
-
-const PlatformRemoteDarwinDevice::SDKDirectoryInfo *
-PlatformRemoteDarwinDevice::GetSDKDirectoryForLatestOSVersion() {
-  const PlatformRemoteDarwinDevice::SDKDirectoryInfo *result = nullptr;
-  if (UpdateSDKDirectoryInfosIfNeeded()) {
-    auto max = std::max_element(
-        m_sdk_directory_infos.begin(), m_sdk_directory_infos.end(),
-        [](const SDKDirectoryInfo &a, const SDKDirectoryInfo &b) {
-          return a.version < b.version;
-        });
-    if (max != m_sdk_directory_infos.end())
-      result = &*max;
-  }
-  return result;
-}
-
-const char *PlatformRemoteDarwinDevice::GetDeviceSupportDirectory() {
-  std::string platform_dir =
-      ("/Platforms/" + GetPlatformName() + "/DeviceSupport").str();
-  if (m_device_support_directory.empty()) {
-    if (FileSpec fspec = HostInfo::GetXcodeDeveloperDirectory()) {
-      m_device_support_directory = fspec.GetPath();
-      m_device_support_directory.append(platform_dir.c_str());
-    } else {
-      // Assign a single NULL character so we know we tried to find the device
-      // support directory and we don't keep trying to find it over and over.
-      m_device_support_directory.assign(1, '\0');
-    }
-  }
-  // We should have put a single NULL character into m_device_support_directory
-  // or it should have a valid path if the code gets here
-  assert(m_device_support_directory.empty() == false);
-  if (m_device_support_directory[0])
-    return m_device_support_directory.c_str();
-  return nullptr;
-}
-
-const char *PlatformRemoteDarwinDevice::GetDeviceSupportDirectoryForOSVersion() {
-  if (m_sdk_sysroot)
-    return m_sdk_sysroot.GetCString();
-
-  if (m_device_support_directory_for_os_version.empty()) {
-    const PlatformRemoteDarwinDevice::SDKDirectoryInfo *sdk_dir_info =
-        GetSDKDirectoryForCurrentOSVersion();
-    if (sdk_dir_info == nullptr)
-      sdk_dir_info = GetSDKDirectoryForLatestOSVersion();
-    if (sdk_dir_info) {
-      char path[PATH_MAX];
-      if (sdk_dir_info->directory.GetPath(path, sizeof(path))) {
-        m_device_support_directory_for_os_version = path;
-        return m_device_support_directory_for_os_version.c_str();
-      }
-    } else {
-      // Assign a single NULL character so we know we tried to find the device
-      // support directory and we don't keep trying to find it over and over.
-      m_device_support_directory_for_os_version.assign(1, '\0');
-    }
-  }
-  // We should have put a single NULL character into
-  // m_device_support_directory_for_os_version or it should have a valid path
-  // if the code gets here
-  assert(m_device_support_directory_for_os_version.empty() == false);
-  if (m_device_support_directory_for_os_version[0])
-    return m_device_support_directory_for_os_version.c_str();
-  return nullptr;
-}
-
-uint32_t PlatformRemoteDarwinDevice::FindFileInAllSDKs(const char *platform_file_path,
-                                              FileSpecList &file_list) {
-  Log *log = GetLog(LLDBLog::Host);
-  if (platform_file_path && platform_file_path[0] &&
-      UpdateSDKDirectoryInfosIfNeeded()) {
-    const uint32_t num_sdk_infos = m_sdk_directory_infos.size();
-    lldb_private::FileSpec local_file;
-    // First try for an exact match of major, minor and update
-    for (uint32_t sdk_idx = 0; sdk_idx < num_sdk_infos; ++sdk_idx) {
-      LLDB_LOGV(log, "Searching for {0} in sdk path {1}", platform_file_path,
-                m_sdk_directory_infos[sdk_idx].directory);
-      if (GetFileInSDK(platform_file_path, sdk_idx, local_file)) {
-        file_list.Append(local_file);
-      }
-    }
-  }
-  return file_list.GetSize();
-}
-
 bool PlatformRemoteDarwinDevice::GetFileInSDK(const char *platform_file_path,
                                      uint32_t sdk_idx,
                                      lldb_private::FileSpec &local_file) {
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.h
index 02cb09d..c604866 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.h
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.h
@@ -1,5 +1,4 @@
-//===-- PlatformRemoteDarwinDevice.h -------------------------------------*- C++
-//-*-===//
+//===-- PlatformRemoteDarwinDevice.h ----------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -10,7 +9,7 @@
 #ifndef LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMREMOTEDARWINDEVICE_H
 #define LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMREMOTEDARWINDEVICE_H
 
-#include "PlatformDarwin.h"
+#include "PlatformDarwinDevice.h"
 #include "lldb/Host/FileSystem.h"
 #include "lldb/Utility/ConstString.h"
 #include "lldb/Utility/FileSpec.h"
@@ -34,7 +33,7 @@ class Stream;
 class Target;
 class UUID;
 
-class PlatformRemoteDarwinDevice : public PlatformDarwin {
+class PlatformRemoteDarwinDevice : public PlatformDarwinDevice {
 public:
   PlatformRemoteDarwinDevice();
 
@@ -64,57 +63,19 @@ public:
   }
 
 protected:
-  struct SDKDirectoryInfo {
-    SDKDirectoryInfo(const FileSpec &sdk_dir_spec);
-    FileSpec directory;
-    ConstString build;
-    llvm::VersionTuple version;
-    bool user_cached;
-  };
-
-  typedef std::vector<SDKDirectoryInfo> SDKDirectoryInfoCollection;
-
-  std::mutex m_sdk_dir_mutex;
-  SDKDirectoryInfoCollection m_sdk_directory_infos;
-  std::string m_device_support_directory;
-  std::string m_device_support_directory_for_os_version;
   std::string m_build_update;
   uint32_t m_last_module_sdk_idx = UINT32_MAX;
   uint32_t m_connected_module_sdk_idx = UINT32_MAX;
 
-  bool UpdateSDKDirectoryInfosIfNeeded();
-
-  const char *GetDeviceSupportDirectory();
-
-  const char *GetDeviceSupportDirectoryForOSVersion();
-
-  const SDKDirectoryInfo *GetSDKDirectoryForLatestOSVersion();
-
-  const SDKDirectoryInfo *GetSDKDirectoryForCurrentOSVersion();
-
-  static FileSystem::EnumerateDirectoryResult
-  GetContainedFilesIntoVectorOfStringsCallback(void *baton,
-                                               llvm::sys::fs::file_type ft,
-                                               llvm::StringRef path);
-
-  uint32_t FindFileInAllSDKs(const char *platform_file_path,
-                             FileSpecList &file_list);
-
   bool GetFileInSDK(const char *platform_file_path, uint32_t sdk_idx,
                     FileSpec &local_file);
 
-  uint32_t FindFileInAllSDKs(const FileSpec &platform_file,
-                             FileSpecList &file_list);
-
   uint32_t GetConnectedSDKIndex();
 
   // Get index of SDK in SDKDirectoryInfoCollection by its pointer and return
   // UINT32_MAX if that SDK not found.
   uint32_t GetSDKIndexBySDKDirectoryInfo(const SDKDirectoryInfo *sdk_info);
 
-  virtual llvm::StringRef GetDeviceSupportDirectoryName() = 0;
-  virtual llvm::StringRef GetPlatformName() = 0;
-
 private:
   PlatformRemoteDarwinDevice(const PlatformRemoteDarwinDevice &) = delete;
   const PlatformRemoteDarwinDevice &
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp
index 9964379..460f6d8 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp
@@ -70,8 +70,8 @@ PlatformSP PlatformRemoteMacOSX::CreateInstance(bool force,
     const char *triple_cstr =
         arch ? arch->GetTriple().getTriple().c_str() : "<null>";
 
-    LLDB_LOGF(log, "PlatformMacOSX::%s(force=%s, arch={%s,%s})", __FUNCTION__,
-              force ? "true" : "false", arch_name, triple_cstr);
+    LLDB_LOGF(log, "PlatformRemoteMacOSX::%s(force=%s, arch={%s,%s})",
+              __FUNCTION__, force ? "true" : "false", arch_name, triple_cstr);
   }
 
   bool create = force;
@@ -141,52 +141,6 @@ PlatformRemoteMacOSX::GetSupportedArchitectures(const ArchSpec &host_info) {
   return result;
 }
 
-lldb_private::Status PlatformRemoteMacOSX::GetFileWithUUID(
-    const lldb_private::FileSpec &platform_file,
-    const lldb_private::UUID *uuid_ptr, lldb_private::FileSpec &local_file) {
-  if (m_remote_platform_sp) {
-    std::string local_os_build;
-#if !defined(__linux__)
-    local_os_build = HostInfo::GetOSBuildString().getValueOr("");
-#endif
-    llvm::Optional<std::string> remote_os_build =
-        m_remote_platform_sp->GetOSBuildString();
-    if (local_os_build == remote_os_build) {
-      // same OS version: the local file is good enough
-      local_file = platform_file;
-      return Status();
-    } else {
-      // try to find the file in the cache
-      std::string cache_path(GetLocalCacheDirectory());
-      std::string module_path(platform_file.GetPath());
-      cache_path.append(module_path);
-      FileSpec module_cache_spec(cache_path);
-      if (FileSystem::Instance().Exists(module_cache_spec)) {
-        local_file = module_cache_spec;
-        return Status();
-      }
-      // bring in the remote module file
-      FileSpec module_cache_folder =
-          module_cache_spec.CopyByRemovingLastPathComponent();
-      // try to make the local directory first
-      Status err(
-          llvm::sys::fs::create_directory(module_cache_folder.GetPath()));
-      if (err.Fail())
-        return err;
-      err = GetFile(platform_file, module_cache_spec);
-      if (err.Fail())
-        return err;
-      if (FileSystem::Instance().Exists(module_cache_spec)) {
-        local_file = module_cache_spec;
-        return Status();
-      } else
-        return Status("unable to obtain valid module file");
-    }
-  }
-  local_file = platform_file;
-  return Status();
-}
-
 llvm::StringRef PlatformRemoteMacOSX::GetDescriptionStatic() {
   return "Remote Mac OS X user platform plug-in.";
 }
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h
index d84b5c03..17ae372 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h
@@ -41,9 +41,6 @@ public:
 
   llvm::StringRef GetDescription() override { return GetDescriptionStatic(); }
 
-  Status GetFileWithUUID(const FileSpec &platform_file, const UUID *uuid_ptr,
-                         FileSpec &local_file) override;
-
   std::vector<ArchSpec>
   GetSupportedArchitectures(const ArchSpec &process_host_arch) override;
 
diff --git a/lldb/source/Plugins/Process/Linux/CMakeLists.txt b/lldb/source/Plugins/Process/Linux/CMakeLists.txt
index cc70edb..125cc0e 100644
--- a/lldb/source/Plugins/Process/Linux/CMakeLists.txt
+++ b/lldb/source/Plugins/Process/Linux/CMakeLists.txt
@@ -9,6 +9,7 @@ add_lldb_library(lldbPluginProcessLinux
   NativeRegisterContextLinux_x86_64.cpp
   NativeThreadLinux.cpp
   Perf.cpp
+  Procfs.cpp
   SingleStepCheck.cpp
 
   LINK_LIBS
diff --git a/lldb/source/Plugins/Process/Linux/IntelPTCollector.cpp b/lldb/source/Plugins/Process/Linux/IntelPTCollector.cpp
index 8c10828..e2303b3 100644
--- a/lldb/source/Plugins/Process/Linux/IntelPTCollector.cpp
+++ b/lldb/source/Plugins/Process/Linux/IntelPTCollector.cpp
@@ -9,6 +9,7 @@
 #include "IntelPTCollector.h"
 
 #include "Perf.h"
+#include "Procfs.h"
 
 #include "Plugins/Process/POSIX/ProcessPOSIXLog.h"
 #include "lldb/Host/linux/Support.h"
@@ -57,21 +58,6 @@ enum IntelPTConfigFileType {
   BitOffset
 };
 
-/// Get the content of /proc/cpuinfo that can be later used to decode traces.
-static Expected<ArrayRef<uint8_t>> GetCPUInfo() {
-  static llvm::Optional<std::vector<uint8_t>> cpu_info;
-  if (!cpu_info) {
-    auto buffer_or_error = errorOrToExpected(getProcFile("cpuinfo"));
-    if (!buffer_or_error)
-      return buffer_or_error.takeError();
-    MemoryBuffer &buffer = **buffer_or_error;
-    cpu_info = std::vector<uint8_t>(
-        reinterpret_cast<const uint8_t *>(buffer.getBufferStart()),
-        reinterpret_cast<const uint8_t *>(buffer.getBufferEnd()));
-  }
-  return *cpu_info;
-}
-
 static Expected<uint32_t> ReadIntelPTConfigFile(const char *file,
                                                 IntelPTConfigFileType type) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> stream =
@@ -430,7 +416,7 @@ void IntelPTThreadTrace::ReadCyclicBuffer(llvm::MutableArrayRef<uint8_t> &dst,
 
 TraceThreadState IntelPTThreadTrace::GetState() const {
   return {static_cast<int64_t>(m_tid),
-          {TraceBinaryData{"threadTraceBuffer",
+          {TraceBinaryData{IntelPTDataKinds::kThreadTraceBuffer,
                            static_cast<int64_t>(GetTraceBufferSize())}}};
 }
 
@@ -592,13 +578,13 @@ Error IntelPTCollector::OnThreadDestroyed(lldb::tid_t tid) {
 }
 
 Expected<json::Value> IntelPTCollector::GetState() const {
-  Expected<ArrayRef<uint8_t>> cpu_info = GetCPUInfo();
+  Expected<ArrayRef<uint8_t>> cpu_info = GetProcfsCpuInfo();
   if (!cpu_info)
     return cpu_info.takeError();
 
   TraceGetStateResponse state;
-  state.processBinaryData.push_back(
-      {"cpuInfo", static_cast<int64_t>(cpu_info->size())});
+  state.processBinaryData.push_back({IntelPTDataKinds::kProcFsCpuInfo,
+                                     static_cast<int64_t>(cpu_info->size())});
 
   std::vector<TraceThreadState> thread_states =
       m_thread_traces.GetThreadStates();
@@ -622,14 +608,14 @@ IntelPTCollector::GetTracedThread(lldb::tid_t tid) const {
 
 Expected<std::vector<uint8_t>>
 IntelPTCollector::GetBinaryData(const TraceGetBinaryDataRequest &request) const {
-  if (request.kind == "threadTraceBuffer") {
+  if (request.kind == IntelPTDataKinds::kThreadTraceBuffer) {
     if (Expected<const IntelPTThreadTrace &> trace =
             GetTracedThread(*request.tid))
       return trace->GetIntelPTBuffer(request.offset, request.size);
     else
       return trace.takeError();
-  } else if (request.kind == "cpuInfo") {
-    return GetCPUInfo();
+  } else if (request.kind == IntelPTDataKinds::kProcFsCpuInfo) {
+    return GetProcfsCpuInfo();
   }
   return createStringError(inconvertibleErrorCode(),
                            "Unsuported trace binary data kind: %s",
diff --git a/lldb/source/Plugins/Process/Linux/Perf.cpp b/lldb/source/Plugins/Process/Linux/Perf.cpp
index ed65c61..d43cd12 100644
--- a/lldb/source/Plugins/Process/Linux/Perf.cpp
+++ b/lldb/source/Plugins/Process/Linux/Perf.cpp
@@ -8,8 +8,11 @@
 
 #include "Perf.h"
 
+#include "lldb/Host/linux/Support.h"
+
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
 
 #include <sys/mman.h>
 #include <sys/syscall.h>
diff --git a/lldb/source/Plugins/Process/Linux/Procfs.cpp b/lldb/source/Plugins/Process/Linux/Procfs.cpp
new file mode 100644
index 0000000..1ff7c568
--- /dev/null
+++ b/lldb/source/Plugins/Process/Linux/Procfs.cpp
@@ -0,0 +1,71 @@
+//===-- Procfs.cpp --------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Procfs.h"
+
+#include "lldb/Host/linux/Support.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace lldb_private;
+using namespace process_linux;
+using namespace llvm;
+
+Expected<ArrayRef<uint8_t>> lldb_private::process_linux::GetProcfsCpuInfo() {
+  static Optional<std::vector<uint8_t>> cpu_info;
+  if (!cpu_info) {
+    auto buffer_or_error = errorOrToExpected(getProcFile("cpuinfo"));
+    if (!buffer_or_error)
+      return buffer_or_error.takeError();
+    MemoryBuffer &buffer = **buffer_or_error;
+    cpu_info = std::vector<uint8_t>(
+        reinterpret_cast<const uint8_t *>(buffer.getBufferStart()),
+        reinterpret_cast<const uint8_t *>(buffer.getBufferEnd()));
+  }
+  return *cpu_info;
+}
+
+Expected<std::vector<int>>
+lldb_private::process_linux::GetAvailableLogicalCoreIDs(StringRef cpuinfo) {
+  SmallVector<StringRef, 8> lines;
+  cpuinfo.split(lines, "\n", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+  std::vector<int> logical_cores;
+
+  for (StringRef line : lines) {
+    std::pair<StringRef, StringRef> key_value = line.split(':');
+    auto key = key_value.first.trim();
+    auto val = key_value.second.trim();
+    if (key == "processor") {
+      int processor;
+      if (val.getAsInteger(10, processor))
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Failed parsing the /proc/cpuinfo line entry: %s", line.data());
+      logical_cores.push_back(processor);
+    }
+  }
+  return logical_cores;
+}
+
+llvm::Expected<llvm::ArrayRef<int>>
+lldb_private::process_linux::GetAvailableLogicalCoreIDs() {
+  static Optional<std::vector<int>> logical_cores_ids;
+  if (!logical_cores_ids) {
+    // We find the actual list of core ids by parsing /proc/cpuinfo
+    Expected<ArrayRef<uint8_t>> cpuinfo = GetProcfsCpuInfo();
+    if (!cpuinfo)
+      return cpuinfo.takeError();
+
+    Expected<std::vector<int>> core_ids = GetAvailableLogicalCoreIDs(
+        StringRef(reinterpret_cast<const char *>(cpuinfo->data())));
+    if (!core_ids)
+      return core_ids.takeError();
+
+    logical_cores_ids.emplace(std::move(*core_ids));
+  }
+  return *logical_cores_ids;
+}
diff --git a/lldb/source/Plugins/Process/Linux/Procfs.h b/lldb/source/Plugins/Process/Linux/Procfs.h
index 59dd76a..297fafe 100644
--- a/lldb/source/Plugins/Process/Linux/Procfs.h
+++ b/lldb/source/Plugins/Process/Linux/Procfs.h
@@ -11,6 +11,10 @@
 
 #include <sys/ptrace.h>
 
+#include "llvm/Support/Error.h"
+
+#include <vector>
+
 #ifdef __ANDROID__
 #if defined(__arm64__) || defined(__aarch64__)
 typedef unsigned long elf_greg_t;
@@ -28,3 +32,24 @@ typedef struct user_fpsimd_state elf_fpregset_t;
 #else // __ANDROID__
 #include <sys/procfs.h>
 #endif // __ANDROID__
+
+namespace lldb_private {
+namespace process_linux {
+
+/// \return
+///     The content of /proc/cpuinfo and cache it if errors didn't happen.
+llvm::Expected<llvm::ArrayRef<uint8_t>> GetProcfsCpuInfo();
+
+/// \return
+///     A list of available logical core ids given the contents of
+///     /proc/cpuinfo.
+llvm::Expected<std::vector<int>>
+GetAvailableLogicalCoreIDs(llvm::StringRef cpuinfo);
+
+/// \return
+///     A list with all the logical cores available in the system and cache it
+///     if errors didn't happen.
+llvm::Expected<llvm::ArrayRef<int>> GetAvailableLogicalCoreIDs();
+
+} // namespace process_linux
+} // namespace lldb_private
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
index a5def79..394963a 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
@@ -81,7 +81,8 @@ TraceIntelPT::TraceIntelPT(
   for (const ThreadPostMortemTraceSP &thread : traced_threads) {
     m_thread_decoders.emplace(thread->GetID(),
                               std::make_unique<ThreadDecoder>(thread, *this));
-    SetPostMortemThreadDataFile(thread->GetID(), "threadTraceBuffer",
+    SetPostMortemThreadDataFile(thread->GetID(),
+                                IntelPTDataKinds::kThreadTraceBuffer,
                                 thread->GetTraceFile());
   }
 }
@@ -179,7 +180,8 @@ llvm::Expected<size_t> TraceIntelPT::GetRawTraceSize(Thread &thread) {
 }
 
 Expected<pt_cpu> TraceIntelPT::GetCPUInfoForLiveProcess() {
-  Expected<std::vector<uint8_t>> cpu_info = GetLiveProcessBinaryData("cpuInfo");
+  Expected<std::vector<uint8_t>> cpu_info =
+      GetLiveProcessBinaryData(IntelPTDataKinds::kProcFsCpuInfo);
   if (!cpu_info)
     return cpu_info.takeError();
 
@@ -393,7 +395,8 @@ Error TraceIntelPT::Start(llvm::ArrayRef<lldb::tid_t> tids,
 
 Error TraceIntelPT::OnThreadBufferRead(lldb::tid_t tid,
                                        OnBinaryDataReadCallback callback) {
-  return OnThreadBinaryDataRead(tid, "threadTraceBuffer", callback);
+  return OnThreadBinaryDataRead(tid, IntelPTDataKinds::kThreadTraceBuffer,
+                                callback);
 }
 
 TaskTimer &TraceIntelPT::GetTimer() { return m_task_timer; }
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp
index 241d9c5..7ed4292 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp
@@ -48,8 +48,8 @@ llvm::Error TraceIntelPTSessionSaver::SaveToDisk(TraceIntelPT &trace_ipt,
     return json_intel_pt_trace.takeError();
 
   llvm::Expected<JSONTraceSessionBase> json_session_description =
-      TraceSessionSaver::BuildProcessesSection(*live_process,
-                                               "threadTraceBuffer", directory);
+      TraceSessionSaver::BuildProcessesSection(
+          *live_process, IntelPTDataKinds::kThreadTraceBuffer, directory);
 
   if (!json_session_description)
     return json_session_description.takeError();
diff --git a/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp b/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp
index 5dd0261..bd450bb 100644
--- a/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp
+++ b/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp
@@ -13,6 +13,9 @@ using namespace llvm::json;
 
 namespace lldb_private {
 
+const char *IntelPTDataKinds::kProcFsCpuInfo = "procfsCpuInfo";
+const char *IntelPTDataKinds::kThreadTraceBuffer = "threadTraceBuffer";
+
 bool fromJSON(const json::Value &value, TraceIntelPTStartRequest &packet,
               Path path) {
   ObjectMapper o(value, path);
diff --git a/lldb/test/API/commands/help/TestHelp.py b/lldb/test/API/commands/help/TestHelp.py
index 5bd85dd..49f8341 100644
--- a/lldb/test/API/commands/help/TestHelp.py
+++ b/lldb/test/API/commands/help/TestHelp.py
@@ -243,3 +243,63 @@ class HelpCommandTestCase(TestBase):
                 "-f <format> ( --format <format> )", "The format to use for each of the value to be written.",
                 "-s <byte-size> ( --size <byte-size> )", "The size in bytes to write from input file or each value."])
 
+    @no_debug_info_test
+    def test_help_shows_optional_short_options(self):
+        """Test that optional short options are printed and that they are in
+           alphabetical order with upper case options first."""
+        self.expect("help memory read",
+                    substrs=["memory read [-br]", "memory read [-AFLORTr]"])
+        self.expect("help target modules lookup",
+                    substrs=["target modules lookup [-Airv]"])
+
+    @no_debug_info_test
+    def test_help_shows_command_options_usage(self):
+        """Test that we start the usage section with a specific line."""
+        self.expect("help memory read", substrs=["Command Options Usage:\n  memory read"])
+
+    @no_debug_info_test
+    def test_help_detailed_information_spacing(self):
+        """Test that we put a break between the usage and the options help lines,
+           and between the options themselves."""
+        self.expect("help memory read", substrs=[
+                    "[<address-expression>]\n\n       --show-tags",
+                    # Starts with the end of the show-tags line
+                    "output).\n\n       -A"])
+
+    @no_debug_info_test
+    def test_help_detailed_information_ordering(self):
+        """Test that we order options alphabetically, upper case first."""
+        # You could test this with a simple regex like:
+        # <upper case>.*<later upper case>.*<lower case>.*<later lower case>
+        # Except that that could pass sometimes even with shuffled output.
+        # This makes sure that doesn't happen.
+
+        self.runCmd("help memory read")
+        got = self.res.GetOutput()
+        _, options_lines = got.split("Command Options Usage:")
+        options_lines = options_lines.lstrip().splitlines()
+
+        # Skip over "memory read [-xyz] lines.
+        while("memory read" in options_lines[0]):
+            options_lines.pop(0)
+        # Plus the newline after that.
+        options_lines.pop(0)
+
+        short_options = []
+        for line in options_lines:
+            # Ignore line breaks and descriptions.
+            # (not stripping the line here in case some line of the descriptions
+            # happens to start with "-")
+            if not line or not line.startswith("       -"):
+                continue
+            # This apears at the end after the options.
+            if "This command takes options and free form arguments." in line:
+                break
+            line = line.strip()
+            # The order of -- only options is not enforced so ignore their position.
+            if not line.startswith("--"):
+                # Save its short char name.
+                short_options.append(line[1])
+
+        self.assertEqual(sorted(short_options), short_options,
+                         "Short option help displayed in an incorrect order!")
diff --git a/lldb/unittests/Process/Linux/CMakeLists.txt b/lldb/unittests/Process/Linux/CMakeLists.txt
index 30fa7e4..d7ad90e 100644
--- a/lldb/unittests/Process/Linux/CMakeLists.txt
+++ b/lldb/unittests/Process/Linux/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_lldb_unittest(ProcessLinuxTests
   IntelPTCollectorTests.cpp
   PerfTests.cpp
+  ProcfsTests.cpp
 
   LINK_LIBS
     lldbPluginProcessLinux
diff --git a/lldb/unittests/Process/Linux/PerfTests.cpp b/lldb/unittests/Process/Linux/PerfTests.cpp
index 084b857..fd9280b 100644
--- a/lldb/unittests/Process/Linux/PerfTests.cpp
+++ b/lldb/unittests/Process/Linux/PerfTests.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Support/Error.h"
 
 #include "gtest/gtest.h"
+
 #include <chrono>
 #include <cstdint>
 
diff --git a/lldb/unittests/Process/Linux/ProcfsTests.cpp b/lldb/unittests/Process/Linux/ProcfsTests.cpp
new file mode 100644
index 0000000..f619234
--- /dev/null
+++ b/lldb/unittests/Process/Linux/ProcfsTests.cpp
@@ -0,0 +1,104 @@
+//===-- ProcfsTests.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Procfs.h"
+
+#include "lldb/Host/linux/Support.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using namespace lldb_private;
+using namespace process_linux;
+using namespace llvm;
+
+TEST(Perf, HardcodedLogicalCoreIDs) {
+  Expected<std::vector<int>> core_ids =
+      GetAvailableLogicalCoreIDs(R"(processor       : 13
+vendor_id       : GenuineIntel
+cpu family      : 6
+model           : 85
+model name      : Intel(R) Xeon(R) Gold 6138 CPU @ 2.00GHz
+stepping        : 4
+microcode       : 0x2000065
+cpu MHz         : 2886.370
+cache size      : 28160 KB
+physical id     : 1
+siblings        : 40
+core id         : 19
+cpu cores       : 20
+apicid          : 103
+initial apicid  : 103
+fpu             : yes
+fpu_exception   : yes
+cpuid level     : 22
+power management:
+
+processor       : 24
+vendor_id       : GenuineIntel
+cpu family      : 6
+model           : 85
+model name      : Intel(R) Xeon(R) Gold 6138 CPU @ 2.00GHz
+stepping        : 4
+microcode       : 0x2000065
+cpu MHz         : 2768.494
+cache size      : 28160 KB
+physical id     : 1
+siblings        : 40
+core id         : 20
+cpu cores       : 20
+apicid          : 105
+power management:
+
+processor       : 35
+vendor_id       : GenuineIntel
+cpu family      : 6
+model           : 85
+model name      : Intel(R) Xeon(R) Gold 6138 CPU @ 2.00GHz
+stepping        : 4
+microcode       : 0x2000065
+cpu MHz         : 2884.703
+cache size      : 28160 KB
+physical id     : 1
+siblings        : 40
+core id         : 24
+cpu cores       : 20
+apicid          : 113
+
+processor       : 79
+vendor_id       : GenuineIntel
+cpu family      : 6
+model           : 85
+model name      : Intel(R) Xeon(R) Gold 6138 CPU @ 2.00GHz
+stepping        : 4
+microcode       : 0x2000065
+cpu MHz         : 3073.955
+cache size      : 28160 KB
+physical id     : 1
+siblings        : 40
+core id         : 28
+cpu cores       : 20
+apicid          : 121
+power management:
+)");
+
+  ASSERT_TRUE((bool)core_ids);
+  ASSERT_THAT(*core_ids, ::testing::ElementsAre(13, 24, 35, 79));
+}
+
+TEST(Perf, RealLogicalCoreIDs) {
+  // We first check we can read /proc/cpuinfo
+  auto buffer_or_error = errorOrToExpected(getProcFile("cpuinfo"));
+  if (!buffer_or_error)
+    GTEST_SKIP() << toString(buffer_or_error.takeError());
+
+  // At this point we shouldn't fail parsing the core ids
+  Expected<ArrayRef<int>> core_ids = GetAvailableLogicalCoreIDs();
+  ASSERT_TRUE((bool)core_ids);
+  ASSERT_GT((int)core_ids->size(), 0) << "We must see at least one core";
+}
diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp
index 0fb7f46..361e529 100644
--- a/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp
+++ b/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp
@@ -67,7 +67,7 @@ static void EncodeDecode(const NameToDIE &object, ByteOrder byte_order) {
   NameToDIE decoded_object;
   offset_t data_offset = 0;
   decoded_object.Decode(data, &data_offset, strtab_reader);
-  EXPECT_TRUE(object == decoded_object);
+  EXPECT_EQ(object, decoded_object);
 }
 
 static void EncodeDecode(const NameToDIE &object) {
@@ -83,6 +83,7 @@ TEST(DWARFIndexCachingTest, NameToDIEEncodeDecode) {
              DIERef(llvm::None, DIERef::Section::DebugInfo, 0x11223344));
   map.Insert(ConstString("workd"),
              DIERef(100, DIERef::Section::DebugInfo, 0x11223344));
+  map.Finalize();
   // Make sure a valid NameToDIE map encodes and decodes correctly.
   EncodeDecode(map);
 }
diff --git a/llvm/docs/AdvancedBuilds.rst b/llvm/docs/AdvancedBuilds.rst
index 1781726..3edfca3 100644
--- a/llvm/docs/AdvancedBuilds.rst
+++ b/llvm/docs/AdvancedBuilds.rst
@@ -81,7 +81,7 @@ You can build an Apple Clang compiler using the following commands:
 
 .. code-block:: console
 
-  $ cmake -G Ninja -C <path to clang>/cmake/caches/Apple-stage1.cmake <path to source>
+  $ cmake -G Ninja -C <path to source>/clang/cmake/caches/Apple-stage1.cmake <path to source>
   $ ninja stage2-distribution
 
 This CMake invocation configures the stage1 host compiler, and sets
@@ -115,7 +115,7 @@ running:
 
 .. code-block:: console
 
-  $ cmake -G Ninja -C <path_to_clang>/cmake/caches/PGO.cmake <source dir>
+  $ cmake -G Ninja -C <path to source>/clang/cmake/caches/PGO.cmake <path to source>
   $ ninja stage2-instrumented-generate-profdata
 
 If you let that run for a few hours or so, it will place a profdata file in your
@@ -147,7 +147,7 @@ The PGO came cache generates the following additional targets:
 
 **stage2-instrumented-generate-profdata**
   Depends on "stage2-instrumented" and will use the instrumented compiler to
-  generate profdata based on the training files in <clang>/utils/perf-training
+  generate profdata based on the training files in clang/utils/perf-training
 
 **stage2**
   Depends of "stage2-instrumented-generate-profdata" and will use the stage1
@@ -185,7 +185,7 @@ following commands:
 
 .. code-block:: console
 
-  $ cmake -G Ninja -C <path_to_clang>/cmake/caches/3-stage.cmake <source dir>
+  $ cmake -G Ninja -C <path to source>/clang/cmake/caches/3-stage.cmake <path to source>
   $ cmake --build . --target stage3 --parallel
 
 After the build you can compare the stage2 & stage3 compilers.
diff --git a/llvm/docs/CodeOfConduct.rst b/llvm/docs/CodeOfConduct.rst
index d13a163..35ace83 100644
--- a/llvm/docs/CodeOfConduct.rst
+++ b/llvm/docs/CodeOfConduct.rst
@@ -143,7 +143,7 @@ The current committee members are:
 Transparency Reports
 ====================
 
-There are currently no transparency reports.
+* `April 28, 2022 <https://llvm.org/coc-reports/2022-04-28-report.html>`_
 
 For details about what a Transparency Report is and what it contains, please see the :doc:`Response Guide<ResponseGuide>`.
 
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 9812ce3..7e3a3b5 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1569,6 +1569,14 @@ example:
     epilogue, the backend should forcibly align the stack pointer.
     Specify the desired alignment, which must be a power of two, in
     parentheses.
+``"alloc-family"="FAMILY"``
+    This indicates which "family" an allocator function is part of. To avoid 
+    collisions, the family name should match the mangled name of the primary 
+    allocator function, that is "malloc" for malloc/calloc/realloc/free, 
+    "_Znwm" for ``::operator::new`` and ``::operator::delete``, and
+    "_ZnwmSt11align_val_t" for aligned ``::operator::new`` and
+    ``::operator::delete``. Matching malloc/realloc/free calls within a family
+    can be optimized, but mismatched ones will be left alone.
 ``allocsize(<EltSizeParam>[, <NumEltsParam>])``
     This attribute indicates that the annotated function will always return at
     least a given number of bytes (or null). Its arguments are zero-indexed
@@ -2188,6 +2196,14 @@ example:
     unbounded. If the optional max value is omitted then max is set to the
     value of min. If the attribute is not present, no assumptions are made
     about the range of vscale.
+``"min-legal-vector-width"="<size>"``
+    This attribute indicates the minimum legal vector width required by the
+    calling conversion. It is the maximum width of vector arguments and
+    returnings in the function and functions called by this function. Because
+    all the vectors are supposed to be legal type for compatibility.
+    Backends are free to ignore the attribute if they don't need to support
+    different maximum legal vector types or such information can be inferred by
+    other attributes.
 
 Call Site Attributes
 ----------------------
@@ -23294,8 +23310,8 @@ The first argument to the '``llvm.is.fpclass``' intrinsic must be
 :ref:`floating-point <t_floating>` or :ref:`vector <t_vector>`
 of floating-point values.
 
-The second argument specifies, which tests to perform. It is an integer value,
-each bit in which specifies floating-point class:
+The second argument specifies, which tests to perform. It must be a compile-time
+integer constant, each bit in which specifies floating-point class:
 
 +-------+----------------------+
 | Bit # | floating-point class |
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 869b144..231d3bb 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -29,6 +29,7 @@ class Loop;
 class PredicatedScalarEvolution;
 class ScalarEvolution;
 class SCEV;
+class StoreInst;
 
 /// These are the kinds of recurrences that we support.
 enum class RecurKind {
@@ -69,14 +70,14 @@ class RecurrenceDescriptor {
 public:
   RecurrenceDescriptor() = default;
 
-  RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K,
-                       FastMathFlags FMF, Instruction *ExactFP, Type *RT,
-                       bool Signed, bool Ordered,
+  RecurrenceDescriptor(Value *Start, Instruction *Exit, StoreInst *Store,
+                       RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
+                       Type *RT, bool Signed, bool Ordered,
                        SmallPtrSetImpl<Instruction *> &CI,
                        unsigned MinWidthCastToRecurTy)
-      : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
-        ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed),
-        IsOrdered(Ordered),
+      : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
+        Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
+        IsSigned(Signed), IsOrdered(Ordered),
         MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
     CastInsts.insert(CI.begin(), CI.end());
   }
@@ -163,22 +164,21 @@ public:
   /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
   /// non-null, the minimal bit width needed to compute the reduction will be
   /// computed.
-  static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop,
-                              FastMathFlags FuncFMF,
-                              RecurrenceDescriptor &RedDes,
-                              DemandedBits *DB = nullptr,
-                              AssumptionCache *AC = nullptr,
-                              DominatorTree *DT = nullptr);
+  static bool
+  AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop,
+                  FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes,
+                  DemandedBits *DB = nullptr, AssumptionCache *AC = nullptr,
+                  DominatorTree *DT = nullptr, ScalarEvolution *SE = nullptr);
 
   /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor
   /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are
   /// non-null, the minimal bit width needed to compute the reduction will be
-  /// computed.
-  static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
-                             RecurrenceDescriptor &RedDes,
-                             DemandedBits *DB = nullptr,
-                             AssumptionCache *AC = nullptr,
-                             DominatorTree *DT = nullptr);
+  /// computed. If \p SE is non-null, store instructions to loop invariant
+  /// addresses are processed.
+  static bool
+  isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes,
+                 DemandedBits *DB = nullptr, AssumptionCache *AC = nullptr,
+                 DominatorTree *DT = nullptr, ScalarEvolution *SE = nullptr);
 
   /// Returns true if Phi is a first-order recurrence. A first-order recurrence
   /// is a non-reduction recurrence relation in which the value of the
@@ -270,6 +270,11 @@ public:
            cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fmuladd;
   }
 
+  /// Reductions may store temporary or final result to an invariant address.
+  /// If there is such a store in the loop then, after successfull run of
+  /// AddReductionVar method, this field will be assigned the last met store.
+  StoreInst *IntermediateStore = nullptr;
+
 private:
   // The starting value of the recurrence.
   // It does not have to be zero!
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 3724fd4..9a9a984 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -575,6 +575,11 @@ public:
     return HasDependenceInvolvingLoopInvariantAddress;
   }
 
+  /// Return the list of stores to invariant addresses.
+  const ArrayRef<StoreInst *> getStoresToInvariantAddresses() const {
+    return StoresToInvariantAddresses;
+  }
+
   /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
   /// them to a more usable form.  All SCEV expressions during the analysis
   /// should be re-written (and therefore simplified) according to PSE.
@@ -634,6 +639,9 @@ private:
   /// Indicator that there are non vectorizable stores to a uniform address.
   bool HasDependenceInvolvingLoopInvariantAddress = false;
 
+  /// List of stores to invariant addresses.
+  SmallVector<StoreInst *> StoresToInvariantAddresses;
+
   /// The diagnostics report generated for the analysis.  E.g. why we
   /// couldn't analyze the loop.
   std::unique_ptr<OptimizationRemarkAnalysis> Report;
diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
index a724a86..ebfd5bb 100644
--- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -111,6 +111,13 @@ private:
   /// smaller than the cache line size \p CLS.
   bool isConsecutive(const Loop &L, unsigned CLS) const;
 
+  /// Retrieve the index of the subscript corresponding to the given loop \p
+  /// L. Return a zero-based positive index if the subscript index is
+  /// succesfully located and a negative value otherwise. For example given the
+  /// indexed reference 'A[i][2j+1][3k+2]', the call
+  /// 'getSubscriptIndex(loop-k)' would return value 2.
+  int getSubscriptIndex(const Loop &L) const;
+
   /// Return the coefficient used in the rightmost dimension.
   const SCEV *getLastCoefficient() const;
 
@@ -243,9 +250,10 @@ private:
 
   /// Sort the LoopCosts vector by decreasing cache cost.
   void sortLoopCosts() {
-    sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
-      return A.second > B.second;
-    });
+    stable_sort(LoopCosts,
+                [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
+                  return A.second > B.second;
+                });
   }
 
 private:
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 98d7ad8..7bfda0124d 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -280,6 +280,13 @@ public:
     return B == OverrideAsUnavailable;
   }
 
+  /// Return true if the function type FTy is valid for the library function
+  /// F, regardless of whether the function is available.
+  bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F,
+                              const Module &M) const {
+    return Impl->isValidProtoForLibFunc(FTy, F, M);
+  }
+
   /// Searches for a particular function name.
   ///
   /// If it is one of the known library functions, return true and set F to the
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 9111b74..df0a249 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -986,8 +986,6 @@ public:
     }
 
     Type *Ty = U->getType();
-    Type *OpTy =
-      U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr;
     unsigned Opcode = Operator::getOpcode(U);
     auto *I = dyn_cast<Instruction>(U);
     switch (Opcode) {
@@ -1059,9 +1057,11 @@ public:
     case Instruction::FPExt:
     case Instruction::SExt:
     case Instruction::ZExt:
-    case Instruction::AddrSpaceCast:
+    case Instruction::AddrSpaceCast: {
+      Type *OpTy = U->getOperand(0)->getType();
       return TargetTTI->getCastInstrCost(
           Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
+    }
     case Instruction::Store: {
       auto *SI = cast<StoreInst>(U);
       Type *ValTy = U->getOperand(0)->getType();
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index d6cafd5..df460cb 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -309,16 +309,16 @@ inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
 /// Identify if the intrinsic is trivially vectorizable.
 /// This method returns true if the intrinsic's argument types are all scalars
 /// for the scalar form of the intrinsic and all vectors (or scalars handled by
-/// hasVectorIntrinsicScalarOpd) for the vector form of the intrinsic.
+/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
 bool isTriviallyVectorizable(Intrinsic::ID ID);
 
 /// Identifies if the vector form of the intrinsic has a scalar operand.
-bool hasVectorIntrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
+bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
+                                        unsigned ScalarOpdIdx);
 
-/// Identifies if the vector form of the intrinsic has a scalar operand that has
+/// Identifies if the vector form of the intrinsic has a operand that has
 /// an overloaded type.
-bool hasVectorIntrinsicOverloadedScalarOpd(Intrinsic::ID ID,
-                                           unsigned ScalarOpdIdx);
+bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx);
 
 /// Returns intrinsic ID for call.
 /// For the input call instruction it finds mapping intrinsic and returns
diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h
new file mode 100644
index 0000000..34660c8
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/DXContainer.h
@@ -0,0 +1,91 @@
+//===-- llvm/BinaryFormat/DXContainer.h - The DXBC file format --*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines manifest constants for the DXContainer object file format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_DXCONTAINER_H
+#define LLVM_BINARYFORMAT_DXCONTAINER_H
+
+#include "llvm/Support/SwapByteOrder.h"
+
+#include <stdint.h>
+
+namespace llvm {
+
+// The DXContainer file format is arranged as a header and "parts". Semantically
+// parts are similar to sections in other object file formats. The File format
+// structure is roughly:
+
+// ┌────────────────────────────────┐
+// │             Header             │
+// ├────────────────────────────────┤
+// │              Part              │
+// ├────────────────────────────────┤
+// │              Part              │
+// ├────────────────────────────────┤
+// │              ...               │
+// └────────────────────────────────┘
+
+namespace dxbc {
+
+struct Hash {
+  uint8_t Digest[16];
+};
+
+enum class HashFlags : uint32_t {
+  None = 0,           // No flags defined.
+  IncludesSource = 1, // This flag indicates that the shader hash was computed
+                      // taking into account source information (-Zss)
+};
+
+struct ShaderHash {
+  uint32_t Flags; // DxilShaderHashFlags
+  uint8_t Digest[16];
+
+  void byteSwap() { sys::swapByteOrder(Flags); }
+};
+
+struct ContainerVersion {
+  uint16_t Major;
+  uint16_t Minor;
+
+  void byteSwap() {
+    sys::swapByteOrder(Major);
+    sys::swapByteOrder(Minor);
+  }
+};
+
+struct Header {
+  uint8_t Magic[4]; // "DXBC"
+  Hash FileHash;
+  ContainerVersion Version;
+  uint32_t FileSize;
+  uint32_t PartCount;
+
+  void byteSwap() {
+    Version.byteSwap();
+    sys::swapByteOrder(FileSize);
+    sys::swapByteOrder(PartCount);
+  }
+  // Structure is followed by part offsets: uint32_t PartOffset[PartCount];
+  // The offset is to a PartHeader, which is followed by the Part Data.
+};
+
+/// Use this type to describe the size and type of a DXIL container part.
+struct PartHeader {
+  uint8_t Name[4];
+  uint32_t Size;
+  // Structure is followed directly by part data: uint8_t PartData[PartSize].
+};
+
+} // namespace dxbc
+} // namespace llvm
+
+#endif // LLVM_BINARYFORMAT_DXCONTAINER_H
diff --git a/llvm/include/llvm/BinaryFormat/Magic.h b/llvm/include/llvm/BinaryFormat/Magic.h
index e91f0fd..b6bc4e9 100644
--- a/llvm/include/llvm/BinaryFormat/Magic.h
+++ b/llvm/include/llvm/BinaryFormat/Magic.h
@@ -52,6 +52,7 @@ struct file_magic {
     pdb,                 ///< Windows PDB debug info file
     tapi_file,           ///< Text-based Dynamic Library Stub file
     cuda_fatbinary,      ///< CUDA Fatbinary object file
+    dxcontainer_object,  ///< DirectX container file
   };
 
   bool is_object() const { return V != unknown; }
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 23305b5..e4dd862 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -204,6 +204,12 @@ public:
   /// to an LLVM basic block.
   const BasicBlock *getBasicBlock() const { return BB; }
 
+  /// Remove the reference to the underlying IR BasicBlock. This is for
+  /// reduction tools and should generally not be used.
+  void clearBasicBlock() {
+    BB = nullptr;
+  }
+
   /// Return the name of the corresponding LLVM basic block, or an empty string.
   StringRef getName() const;
 
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index dfedf14..edf6e51b 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -505,6 +505,14 @@ public:
     return Objects[ObjectIdx+NumFixedObjects].Alloca;
   }
 
+  /// Remove the underlying Alloca of the specified stack object if it
+  /// exists. This generally should not be used and is for reduction tooling.
+  void clearObjectAllocation(int ObjectIdx) {
+    assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    Objects[ObjectIdx + NumFixedObjects].Alloca = nullptr;
+  }
+
   /// Return the assigned stack offset of the specified object
   /// from the incoming stack pointer.
   int64_t getObjectOffset(int ObjectIdx) const {
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 6363d3e..55873c2 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -531,8 +531,13 @@ public:
   /// the copied value; or for parameters, creates a DBG_PHI on entry.
   /// May insert instructions into the entry block!
   /// \p MI The copy-like instruction to salvage.
+  /// \p DbgPHICache A container to cache already-solved COPYs.
   /// \returns An instruction/operand pair identifying the defining value.
-  DebugInstrOperandPair salvageCopySSA(MachineInstr &MI);
+  DebugInstrOperandPair
+  salvageCopySSA(MachineInstr &MI,
+                 DenseMap<Register, DebugInstrOperandPair> &DbgPHICache);
+
+  DebugInstrOperandPair salvageCopySSAImpl(MachineInstr &MI);
 
   /// Finalise any partially emitted debug instructions. These are DBG_INSTR_REF
   /// instructions where we only knew the vreg of the value they use, not the
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 53f444a..5e9a79a 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -4447,7 +4447,11 @@ Node *AbstractManglingParser<Derived, Alloc>::parseFoldExpr() {
   ++First;
 
   const auto *Op = parseOperatorEncoding();
-  if (!Op || Op->getKind() != OperatorInfo::Binary)
+  if (!Op)
+    return nullptr;
+  if (!(Op->getKind() == OperatorInfo::Binary
+        || (Op->getKind() == OperatorInfo::Member
+            && Op->getName().back() == '*')))
     return nullptr;
 
   Node *Pack = getDerived().parseExpr();
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index e5a1dd3..9575f6d 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -163,6 +163,25 @@ def OMPC_MemoryOrder : Clause<"memory_order"> {
   ];
 }
 
+def OMP_CANCELLATION_CONSTRUCT_Parallel : ClauseVal<"parallel", 1, 1> {}
+def OMP_CANCELLATION_CONSTRUCT_Loop : ClauseVal<"loop", 2, 1> {}
+def OMP_CANCELLATION_CONSTRUCT_Sections : ClauseVal<"sections", 3, 1> {}
+def OMP_CANCELLATION_CONSTRUCT_Taskgroup : ClauseVal<"taskgroup", 4, 1> {}
+def OMP_CANCELLATION_CONSTRUCT_None : ClauseVal<"none", 5, 0> {
+  let isDefault = 1;
+}
+
+def OMPC_CancellationConstructType : Clause<"cancellation_construct_type"> {
+  let enumClauseValue = "CancellationConstructType";
+  let allowedClauseValues = [
+    OMP_CANCELLATION_CONSTRUCT_Parallel,
+    OMP_CANCELLATION_CONSTRUCT_Loop,
+    OMP_CANCELLATION_CONSTRUCT_Sections,
+    OMP_CANCELLATION_CONSTRUCT_Taskgroup,
+    OMP_CANCELLATION_CONSTRUCT_None
+  ];
+}
+
 def OMPC_Ordered : Clause<"ordered"> {
   let clangClass = "OMPOrderedClause";
   let flangClass = "ScalarIntConstantExpr";
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index cc2b16b..5663f7a 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -1252,6 +1252,9 @@ void mergeAttributesForInlining(Function &Caller, const Function &Callee);
 /// \param [in] ToMerge - The function to merge attributes from.
 void mergeAttributesForOutlining(Function &Base, const Function &ToMerge);
 
+/// Update min-legal-vector-width if it is in Attribute and less than Width.
+void updateMinLegalVectorWidthAttr(Function &Fn, uint64_t Width);
+
 } // end namespace AttributeFuncs
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index fb88491..6c65540 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -1160,6 +1160,11 @@ public:
                     Type *Ty     ///< The type to trunc or bitcast C to
   );
 
+  /// Create either an sext, trunc or nothing, depending on whether Ty is
+  /// wider, narrower or the same as C->getType(). This only works with
+  /// integer or vector of integer types.
+  static Constant *getSExtOrTrunc(Constant *C, Type *Ty);
+
   /// Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant
   /// expression.
   static Constant *
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index 6cad37f..ddf8161 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -363,6 +363,8 @@ public:
   /// In all cases, the returned value is a FunctionCallee wrapper around the
   /// 'FunctionType *T' passed in, as well as a 'Value*' either of the Function or
   /// the bitcast to the function.
+  ///
+  /// Note: For library calls getOrInsertLibFunc() should be used instead.
   FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T,
                                      AttributeList AttributeList);
 
diff --git a/llvm/include/llvm/Object/DXContainer.h b/llvm/include/llvm/Object/DXContainer.h
new file mode 100644
index 0000000..69a5e28
--- /dev/null
+++ b/llvm/include/llvm/Object/DXContainer.h
@@ -0,0 +1,44 @@
+//===- DXContainer.h - DXContainer file implementation ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the DXContainerFile class, which implements the ObjectFile
+// interface for DXContainer files.
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_DXCONTAINER_H
+#define LLVM_OBJECT_DXCONTAINER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/DXContainer.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBufferRef.h"
+
+namespace llvm {
+namespace object {
+class DXContainer {
+private:
+  DXContainer(MemoryBufferRef O);
+
+  MemoryBufferRef Data;
+  dxbc::Header Header;
+
+  Error parseHeader();
+
+public:
+  StringRef getData() const { return Data.getBuffer(); }
+  static Expected<DXContainer> create(MemoryBufferRef Object);
+
+  const dxbc::Header &getHeader() const { return Header; }
+};
+
+} // namespace object
+} // namespace llvm
+
+#endif // LLVM_OBJECT_DXCONTAINERFILE_H
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index 44b73fa..bae6fb2 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -291,6 +291,9 @@ AARCH64_CPU_NAME("a64fx", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
                  (AArch64::AEK_FP16 | AArch64::AEK_SVE))
 AARCH64_CPU_NAME("carmel", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
                  AArch64::AEK_FP16)
+AARCH64_CPU_NAME("ampere1", ARMV8_6A, FK_CRYPTO_NEON_FP_ARMV8, false,
+                 (AArch64::AEK_FP16 | AArch64::AEK_MTE | AArch64::AEK_SB |
+                  AArch64::AEK_SSBS))
 // Invalid CPU
 AARCH64_CPU_NAME("invalid", INVALID, FK_INVALID, true, AArch64::AEK_INVALID)
 #undef AARCH64_CPU_NAME
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index 3765aca..6461164 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -1996,12 +1996,15 @@ void TokenizeGNUCommandLine(StringRef Source, StringSaver &Saver,
                             SmallVectorImpl<const char *> &NewArgv,
                             bool MarkEOLs = false);
 
-/// Tokenizes a Windows command line which may contain quotes and escaped
-/// quotes.
+/// Tokenizes a string of Windows command line arguments, which may contain
+/// quotes and escaped quotes.
 ///
 /// See MSDN docs for CommandLineToArgvW for information on the quoting rules.
 /// http://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
 ///
+/// For handling a full Windows command line including the executable name at
+/// the start, see TokenizeWindowsCommandLineFull below.
+///
 /// \param [in] Source The string to be split on whitespace with quotes.
 /// \param [in] Saver Delegates back to the caller for saving parsed strings.
 /// \param [in] MarkEOLs true if tokenizing a response file and you want end of
@@ -2018,6 +2021,23 @@ void TokenizeWindowsCommandLine(StringRef Source, StringSaver &Saver,
 void TokenizeWindowsCommandLineNoCopy(StringRef Source, StringSaver &Saver,
                                       SmallVectorImpl<StringRef> &NewArgv);
 
+/// Tokenizes a Windows full command line, including command name at the start.
+///
+/// This uses the same syntax rules as TokenizeWindowsCommandLine for all but
+/// the first token. But the first token is expected to be parsed as the
+/// executable file name in the way CreateProcess would do it, rather than the
+/// way the C library startup code would do it: CreateProcess does not consider
+/// that \ is ever an escape character (because " is not a valid filename char,
+/// hence there's never a need to escape it to be used literally).
+///
+/// Parameters are the same as for TokenizeWindowsCommandLine. In particular,
+/// if you set MarkEOLs = true, then the first word of every line will be
+/// parsed using the special rules for command names, making this function
+/// suitable for parsing a file full of commands to execute.
+void TokenizeWindowsCommandLineFull(StringRef Source, StringSaver &Saver,
+                                    SmallVectorImpl<const char *> &NewArgv,
+                                    bool MarkEOLs = false);
+
 /// String tokenization function type.  Should be compatible with either
 /// Windows or Unix command line tokenizers.
 using TokenizerCallback = void (*)(StringRef Source, StringSaver &Saver,
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index 87d33b9..d993d73 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -22,23 +22,63 @@ namespace llvm {
   class IRBuilderBase;
 
   /// Analyze the name and prototype of the given function and set any
-  /// applicable attributes.
+  /// applicable attributes. Note that this merely helps optimizations on an
+  /// already existing function but does not consider mandatory attributes.
+  ///
   /// If the library function is unavailable, this doesn't modify it.
   ///
   /// Returns true if any attributes were set and false otherwise.
-  bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI);
-  bool inferLibFuncAttributes(Module *M, StringRef Name, const TargetLibraryInfo &TLI);
+  bool inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name,
+                                     const TargetLibraryInfo &TLI);
+  bool inferNonMandatoryLibFuncAttrs(Function &F, const TargetLibraryInfo &TLI);
+
+  /// Calls getOrInsertFunction() and then makes sure to add mandatory
+  /// argument attributes.
+  FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+                                    LibFunc TheLibFunc, FunctionType *T,
+                                    AttributeList AttributeList);
+  FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+                                    LibFunc TheLibFunc, FunctionType *T);
+  template <typename... ArgsTy>
+  FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+                               LibFunc TheLibFunc, AttributeList AttributeList,
+                               Type *RetTy, ArgsTy... Args) {
+    SmallVector<Type*, sizeof...(ArgsTy)> ArgTys{Args...};
+    return getOrInsertLibFunc(M, TLI, TheLibFunc,
+                              FunctionType::get(RetTy, ArgTys, false),
+                              AttributeList);
+  }
+  /// Same as above, but without the attributes.
+  template <typename... ArgsTy>
+  FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+                             LibFunc TheLibFunc, Type *RetTy, ArgsTy... Args) {
+    return getOrInsertLibFunc(M, TLI, TheLibFunc, AttributeList{}, RetTy,
+                              Args...);
+  }
+  // Avoid an incorrect ordering that'd otherwise compile incorrectly.
+  template <typename... ArgsTy>
+  FunctionCallee
+  getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+                     LibFunc TheLibFunc, AttributeList AttributeList,
+                     FunctionType *Invalid, ArgsTy... Args) = delete;
+
+  /// Check whether the library function is available on target and also that
+  /// it in the current Module is a Function with the right type.
+  bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
+                          LibFunc TheLibFunc);
+  bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
+                          StringRef Name);
 
   /// Check whether the overloaded floating point function
   /// corresponding to \a Ty is available.
-  bool hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+  bool hasFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty,
                   LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn);
 
   /// Get the name of the overloaded floating point function
-  /// corresponding to \a Ty.
-  StringRef getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty,
-                           LibFunc DoubleFn, LibFunc FloatFn,
-                           LibFunc LongDoubleFn);
+  /// corresponding to \a Ty. Return the LibFunc in \a TheLibFunc.
+  StringRef getFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty,
+                       LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn,
+                       LibFunc &TheLibFunc);
 
   /// Return V if it is an i8*, otherwise cast it to i8*.
   Value *castToCStr(Value *V, IRBuilderBase &B);
@@ -148,7 +188,8 @@ namespace llvm {
   /// function is known to take a single of type matching 'Op' and returns one
   /// value with the same type. If 'Op' is a long double, 'l' is added as the
   /// suffix of name, if 'Op' is a float, we add a 'f' suffix.
-  Value *emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilderBase &B,
+  Value *emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
+                              StringRef Name, IRBuilderBase &B,
                               const AttributeList &Attrs);
 
   /// Emit a call to the unary function DoubleFn, FloatFn or LongDoubleFn,
@@ -162,8 +203,10 @@ namespace llvm {
   /// function is known to take type matching 'Op1' and 'Op2' and return one
   /// value with the same type. If 'Op1/Op2' are long double, 'l' is added as
   /// the suffix of name, if 'Op1/Op2' are float, we add a 'f' suffix.
-  Value *emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
-                               IRBuilderBase &B, const AttributeList &Attrs);
+  Value *emitBinaryFloatFnCall(Value *Op1, Value *Op2,
+                               const TargetLibraryInfo *TLI,
+                               StringRef Name, IRBuilderBase &B,
+                               const AttributeList &Attrs);
 
   /// Emit a call to the binary function DoubleFn, FloatFn or LongDoubleFn,
   /// depending of the type of Op1.
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 90a208a..79a44b6 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -235,7 +235,7 @@ private:
 
   /// hasFloatVersion - Checks if there is a float version of the specified
   /// function by checking for an existing function with name FuncName + f
-  bool hasFloatVersion(StringRef FuncName);
+  bool hasFloatVersion(const Module *M, StringRef FuncName);
 
   /// Shared code to optimize strlen+wcslen and strnlen+wcsnlen.
   Value *optimizeStringLength(CallInst *CI, IRBuilderBase &B, unsigned CharSize,
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d3dfd39..830fd25 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -308,6 +308,14 @@ public:
   /// Returns the widest induction type.
   Type *getWidestInductionType() { return WidestIndTy; }
 
+  /// Returns True if given store is a final invariant store of one of the
+  /// reductions found in the loop.
+  bool isInvariantStoreOfReduction(StoreInst *SI);
+
+  /// Returns True if given address is invariant and is used to store recurrent
+  /// expression
+  bool isInvariantAddressOfReduction(Value *V);
+
   /// Returns True if V is a Phi node of an induction variable in this loop.
   bool isInductionPhi(const Value *V) const;
 
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 299ea33..b3f5b12 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -866,21 +866,6 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
 
   Type *IntIdxTy = DL.getIndexType(Ptr->getType());
 
-  // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
-  // "inttoptr (sub (ptrtoint Ptr), V)"
-  if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
-    auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
-    assert((!CE || CE->getType() == IntIdxTy) &&
-           "CastGEPIndices didn't canonicalize index types!");
-    if (CE && CE->getOpcode() == Instruction::Sub &&
-        CE->getOperand(0)->isNullValue()) {
-      Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
-      Res = ConstantExpr::getSub(Res, CE->getOperand(1));
-      Res = ConstantExpr::getIntToPtr(Res, ResTy);
-      return ConstantFoldConstant(Res, DL, TLI);
-    }
-  }
-
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
     if (!isa<ConstantInt>(Ops[i]))
       return nullptr;
@@ -1336,6 +1321,19 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
             DL, BaseOffset, /*AllowNonInbounds=*/true));
         if (Base->isNullValue()) {
           FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);
+        } else {
+          // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V
+          if (GEP->getNumIndices() == 1 &&
+              GEP->getSourceElementType()->isIntegerTy(8)) {
+            auto *Ptr = cast<Constant>(GEP->getPointerOperand());
+            auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1));
+            Type *IntIdxTy = DL.getIndexType(Ptr->getType());
+            if (Sub && Sub->getType() == IntIdxTy &&
+                Sub->getOpcode() == Instruction::Sub &&
+                Sub->getOperand(0)->isNullValue())
+              FoldedValue = ConstantExpr::getSub(
+                  ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1));
+          }
         }
       }
       if (FoldedValue) {
@@ -3038,7 +3036,7 @@ static Constant *ConstantFoldFixedVectorCall(
     // Gather a column of constants.
     for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
       // Some intrinsics use a scalar type for certain arguments.
-      if (hasVectorIntrinsicScalarOpd(IntrinsicID, J)) {
+      if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J)) {
         Lane[J] = Operands[J];
         continue;
       }
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index e03cf6c..e4d706a 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -227,12 +227,10 @@ static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
   return true;
 }
 
-bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
-                                           Loop *TheLoop, FastMathFlags FuncFMF,
-                                           RecurrenceDescriptor &RedDes,
-                                           DemandedBits *DB,
-                                           AssumptionCache *AC,
-                                           DominatorTree *DT) {
+bool RecurrenceDescriptor::AddReductionVar(
+    PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,
+    RecurrenceDescriptor &RedDes, DemandedBits *DB, AssumptionCache *AC,
+    DominatorTree *DT, ScalarEvolution *SE) {
   if (Phi->getNumIncomingValues() != 2)
     return false;
 
@@ -249,6 +247,12 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
   // This includes users of the reduction, variables (which form a cycle
   // which ends in the phi node).
   Instruction *ExitInstruction = nullptr;
+
+  // Variable to keep last visited store instruction. By the end of the
+  // algorithm this variable will be either empty or having intermediate
+  // reduction value stored in invariant address.
+  StoreInst *IntermediateStore = nullptr;
+
   // Indicates that we found a reduction operation in our scan.
   bool FoundReduxOp = false;
 
@@ -314,6 +318,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
   //  - By instructions outside of the loop (safe).
   //      * One value may have several outside users, but all outside
   //        uses must be of the same value.
+  //  - By store instructions with a loop invariant address (safe with
+  //    the following restrictions):
+  //      * If there are several stores, all must have the same address.
+  //      * Final value should be stored in that loop invariant address.
   //  - By an instruction that is not part of the reduction (not safe).
   //    This is either:
   //      * An instruction type other than PHI or the reduction operation.
@@ -321,6 +329,43 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
   while (!Worklist.empty()) {
     Instruction *Cur = Worklist.pop_back_val();
 
+    // Store instructions are allowed iff it is the store of the reduction
+    // value to the same loop invariant memory location.
+    if (auto *SI = dyn_cast<StoreInst>(Cur)) {
+      if (!SE) {
+        LLVM_DEBUG(dbgs() << "Store instructions are not processed without "
+                          << "Scalar Evolution Analysis\n");
+        return false;
+      }
+
+      const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand());
+      // Check it is the same address as previous stores
+      if (IntermediateStore) {
+        const SCEV *OtherScev =
+            SE->getSCEV(IntermediateStore->getPointerOperand());
+
+        if (OtherScev != PtrScev) {
+          LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "
+                            << "inside the loop: " << *SI->getPointerOperand()
+                            << " and "
+                            << *IntermediateStore->getPointerOperand() << '\n');
+          return false;
+        }
+      }
+
+      // Check the pointer is loop invariant
+      if (!SE->isLoopInvariant(PtrScev, TheLoop)) {
+        LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "
+                          << "inside the loop: " << *SI->getPointerOperand()
+                          << '\n');
+        return false;
+      }
+
+      // IntermediateStore is always the last store in the loop.
+      IntermediateStore = SI;
+      continue;
+    }
+
     // No Users.
     // If the instruction has no users then this is a broken chain and can't be
     // a reduction variable.
@@ -443,10 +488,17 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
       // reductions which are represented as a cmp followed by a select.
       InstDesc IgnoredVal(false, nullptr);
       if (VisitedInsts.insert(UI).second) {
-        if (isa<PHINode>(UI))
+        if (isa<PHINode>(UI)) {
           PHIs.push_back(UI);
-        else
+        } else {
+          StoreInst *SI = dyn_cast<StoreInst>(UI);
+          if (SI && SI->getPointerOperand() == Cur) {
+            // Reduction variable chain can only be stored somewhere but it
+            // can't be used as an address.
+            return false;
+          }
           NonPHIs.push_back(UI);
+        }
       } else if (!isa<PHINode>(UI) &&
                  ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
                    !isa<SelectInst>(UI)) ||
@@ -474,6 +526,32 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
   if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
     return false;
 
+  if (IntermediateStore) {
+    // Check that stored value goes to the phi node again. This way we make sure
+    // that the value stored in IntermediateStore is indeed the final reduction
+    // value.
+    if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) {
+      LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "
+                        << *IntermediateStore << '\n');
+      return false;
+    }
+
+    // If there is an exit instruction it's value should be stored in
+    // IntermediateStore
+    if (ExitInstruction &&
+        IntermediateStore->getValueOperand() != ExitInstruction) {
+      LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "
+                           "store last calculated value of the reduction: "
+                        << *IntermediateStore << '\n');
+      return false;
+    }
+
+    // If all uses are inside the loop (intermediate stores), then the
+    // reduction value after the loop will be the one used in the last store.
+    if (!ExitInstruction)
+      ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand());
+  }
+
   if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
     return false;
 
@@ -535,9 +613,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
   // is saved as part of the RecurrenceDescriptor.
 
   // Save the description of this reduction variable.
-  RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ExactFPMathInst,
-                          RecurrenceType, IsSigned, IsOrdered, CastInsts,
-                          MinWidthCastToRecurrenceType);
+  RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
+                          FMF, ExactFPMathInst, RecurrenceType, IsSigned,
+                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
   RedDes = RD;
 
   return true;
@@ -761,7 +839,8 @@ bool RecurrenceDescriptor::hasMultipleUsesOf(
 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
                                           RecurrenceDescriptor &RedDes,
                                           DemandedBits *DB, AssumptionCache *AC,
-                                          DominatorTree *DT) {
+                                          DominatorTree *DT,
+                                          ScalarEvolution *SE) {
   BasicBlock *Header = TheLoop->getHeader();
   Function &F = *Header->getParent();
   FastMathFlags FMF;
@@ -770,72 +849,85 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
   FMF.setNoSignedZeros(
       F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
 
-  if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
     return true;
   }
   if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC,
-                      DT)) {
+                      DT, SE)) {
     LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI."
                       << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
     return true;
   }
   if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC,
-                      DT)) {
+                      DT, SE)) {
     LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI."
                       << " PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC,
-                      DT)) {
+  if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
     return true;
   }
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index b1773db..d0276df 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1993,9 +1993,12 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
   for (StoreInst *ST : Stores) {
     Value *Ptr = ST->getPointerOperand();
 
-    if (isUniform(Ptr))
+    if (isUniform(Ptr)) {
+      // Record store instructions to loop invariant addresses
+      StoresToInvariantAddresses.push_back(ST);
       HasDependenceInvolvingLoopInvariantAddress |=
           !UniformStores.insert(Ptr).second;
+    }
 
     // If we did *not* see this pointer before, insert it to  the read-write
     // list. At this phase it is only a 'write' list.
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index b7806b3..eacd2621 100644
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -103,14 +103,24 @@ static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
   return StepRec == &ElemSize;
 }
 
-/// Compute the trip count for the given loop \p L. Return the SCEV expression
-/// for the trip count or nullptr if it cannot be computed.
-static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
+/// Compute the trip count for the given loop \p L or assume a default value if
+/// it is not a compile time constant. Return the SCEV expression for the trip
+/// count.
+static const SCEV *computeTripCount(const Loop &L, const SCEV &ElemSize,
+                                    ScalarEvolution &SE) {
   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
-  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
-      !isa<SCEVConstant>(BackedgeTakenCount))
-    return nullptr;
-  return SE.getTripCountFromExitCount(BackedgeTakenCount);
+  const SCEV *TripCount = (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+                           isa<SCEVConstant>(BackedgeTakenCount))
+                              ? SE.getTripCountFromExitCount(BackedgeTakenCount)
+                              : nullptr;
+
+  if (!TripCount) {
+    LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
+               << " could not be computed, using DefaultTripCount\n");
+    TripCount = SE.getConstant(ElemSize.getType(), DefaultTripCount);
+  }
+
+  return TripCount;
 }
 
 //===----------------------------------------------------------------------===//
@@ -274,22 +284,18 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
     return 1;
   }
 
-  const SCEV *TripCount = computeTripCount(L, SE);
-  if (!TripCount) {
-    LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
-                      << " could not be computed, using DefaultTripCount\n");
-    const SCEV *ElemSize = Sizes.back();
-    TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
-  }
+  const SCEV *TripCount = computeTripCount(L, *Sizes.back(), SE);
+  assert(TripCount && "Expecting valid TripCount");
   LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
 
-  // If the indexed reference is 'consecutive' the cost is
-  // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
-  const SCEV *RefCost = TripCount;
-
+  const SCEV *RefCost = nullptr;
   if (isConsecutive(L, CLS)) {
+    // If the indexed reference is 'consecutive' the cost is
+    // (TripCount*Stride)/CLS.
     const SCEV *Coeff = getLastCoefficient();
     const SCEV *ElemSize = Sizes.back();
+    assert(Coeff->getType() == ElemSize->getType() &&
+           "Expecting the same type");
     const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
     Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
     const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
@@ -303,10 +309,33 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
     LLVM_DEBUG(dbgs().indent(4)
                << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
                << *RefCost << "\n");
-  } else
+  } else {
+    // If the indexed reference is not 'consecutive' the cost is proportional to
+    // the trip count and the depth of the dimension which the subject loop
+    // subscript is accessing. We try to estimate this by multiplying the cost
+    // by the trip counts of loops corresponding to the inner dimensions. For
+    // example, given the indexed reference 'A[i][j][k]', and assuming the
+    // i-loop is in the innermost position, the cost would be equal to the
+    // iterations of the i-loop multiplied by iterations of the j-loop.
+    RefCost = TripCount;
+
+    int Index = getSubscriptIndex(L);
+    assert(Index >= 0 && "Cound not locate a valid Index");
+
+    for (unsigned I = Index + 1; I < getNumSubscripts() - 1; ++I) {
+      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(getSubscript(I));
+      assert(AR && AR->getLoop() && "Expecting valid loop");
+      const SCEV *TripCount =
+          computeTripCount(*AR->getLoop(), *Sizes.back(), SE);
+      Type *WiderType = SE.getWiderType(RefCost->getType(), TripCount->getType());
+      RefCost = SE.getMulExpr(SE.getNoopOrAnyExtend(RefCost, WiderType),
+                              SE.getNoopOrAnyExtend(TripCount, WiderType));
+    }
+
     LLVM_DEBUG(dbgs().indent(4)
-               << "Access is not consecutive: RefCost=TripCount=" << *RefCost
-               << "\n");
+               << "Access is not consecutive: RefCost=" << *RefCost << "\n");
+  }
+  assert(RefCost && "Expecting a valid RefCost");
 
   // Attempt to fold RefCost into a constant.
   if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
@@ -481,6 +510,16 @@ bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
   return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize);
 }
 
+int IndexedReference::getSubscriptIndex(const Loop &L) const {
+  for (auto Idx : seq<int>(0, getNumSubscripts())) {
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(getSubscript(Idx));
+    if (AR && AR->getLoop() == &L) {
+      return Idx;
+    }
+  }
+  return -1;
+}
+
 const SCEV *IndexedReference::getLastCoefficient() const {
   const SCEV *LastSubscript = getLastSubscript();
   auto *AR = cast<SCEVAddRecExpr>(LastSubscript);
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 75381f5..0144ce4 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -282,6 +282,20 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
         match(LHS, m_c_And(m_Specific(M), m_Value())))
       return true;
   }
+
+  // X op (Y & ~X)
+  if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) ||
+      match(LHS, m_c_And(m_Not(m_Specific(RHS)), m_Value())))
+    return true;
+
+  // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
+  // for constant Y.
+  Value *Y;
+  if (match(RHS,
+            m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) ||
+      match(LHS, m_c_Xor(m_c_And(m_Specific(RHS), m_Value(Y)), m_Deferred(Y))))
+    return true;
+
   // Look for: (A & B) op ~(A | B)
   {
     Value *A, *B;
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 5f8fa13..a53b216 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -40,7 +40,7 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor(
 /// Return true if all of the intrinsic's arguments and return type are scalars
 /// for the scalar form of the intrinsic, and vectors for the vector form of the
 /// intrinsic (except operands that are marked as always being scalar by
-/// hasVectorIntrinsicScalarOpd).
+/// isVectorIntrinsicWithScalarOpAtArg).
 bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   switch (ID) {
   case Intrinsic::abs:   // Begin integer bit-manipulation.
@@ -89,6 +89,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   case Intrinsic::fmuladd:
   case Intrinsic::powi:
   case Intrinsic::canonicalize:
+  case Intrinsic::fptosi_sat:
+  case Intrinsic::fptoui_sat:
     return true;
   default:
     return false;
@@ -96,8 +98,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
 }
 
 /// Identifies if the vector form of the intrinsic has a scalar operand.
-bool llvm::hasVectorIntrinsicScalarOpd(Intrinsic::ID ID,
-                                       unsigned ScalarOpdIdx) {
+bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
+                                              unsigned ScalarOpdIdx) {
   switch (ID) {
   case Intrinsic::abs:
   case Intrinsic::ctlz:
@@ -114,11 +116,14 @@ bool llvm::hasVectorIntrinsicScalarOpd(Intrinsic::ID ID,
   }
 }
 
-bool llvm::hasVectorIntrinsicOverloadedScalarOpd(Intrinsic::ID ID,
-                                                 unsigned ScalarOpdIdx) {
+bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
+                                                  unsigned OpdIdx) {
   switch (ID) {
+  case Intrinsic::fptosi_sat:
+  case Intrinsic::fptoui_sat:
+    return OpdIdx == 0;
   case Intrinsic::powi:
-    return (ScalarOpdIdx == 1);
+    return OpdIdx == 1;
   default:
     return false;
   }
diff --git a/llvm/lib/BinaryFormat/Magic.cpp b/llvm/lib/BinaryFormat/Magic.cpp
index 5d999a90..d48adb1 100644
--- a/llvm/lib/BinaryFormat/Magic.cpp
+++ b/llvm/lib/BinaryFormat/Magic.cpp
@@ -225,6 +225,11 @@ file_magic llvm::identify_magic(StringRef Magic) {
     if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
       return file_magic::tapi_file;
     break;
+  
+  case 'D': // DirectX container file - DXBC
+    if (startswith(Magic, "DXBC") && Magic.size() == 4)
+      return file_magic::dxcontainer_object;
+    break;
 
   default:
     break;
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 06830e8..f2b0024 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -1033,7 +1033,32 @@ void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old,
   }
 }
 
-auto MachineFunction::salvageCopySSA(MachineInstr &MI)
+auto MachineFunction::salvageCopySSA(
+    MachineInstr &MI, DenseMap<Register, DebugInstrOperandPair> &DbgPHICache)
+    -> DebugInstrOperandPair {
+  const TargetInstrInfo &TII = *getSubtarget().getInstrInfo();
+
+  // Check whether this copy-like instruction has already been salvaged into
+  // an operand pair.
+  Register Dest;
+  if (auto CopyDstSrc = TII.isCopyInstr(MI)) {
+    Dest = CopyDstSrc->Destination->getReg();
+  } else {
+    assert(MI.isSubregToReg());
+    Dest = MI.getOperand(0).getReg();
+  }
+
+  auto CacheIt = DbgPHICache.find(Dest);
+  if (CacheIt != DbgPHICache.end())
+    return CacheIt->second;
+
+  // Calculate the instruction number to use, or install a DBG_PHI.
+  auto OperandPair = salvageCopySSAImpl(MI);
+  DbgPHICache.insert({Dest, OperandPair});
+  return OperandPair;
+}
+
+auto MachineFunction::salvageCopySSAImpl(MachineInstr &MI)
     -> DebugInstrOperandPair {
   MachineRegisterInfo &MRI = getRegInfo();
   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
@@ -1189,6 +1214,7 @@ void MachineFunction::finalizeDebugInstrRefs() {
     MI.getOperand(1).ChangeToRegister(0, false);
   };
 
+  DenseMap<Register, DebugInstrOperandPair> ArgDbgPHIs;
   for (auto &MBB : *this) {
     for (auto &MI : MBB) {
       if (!MI.isDebugRef() || !MI.getOperand(0).isReg())
@@ -1211,7 +1237,7 @@ void MachineFunction::finalizeDebugInstrRefs() {
       // instruction that defines the source value, see salvageCopySSA docs
       // for why this is important.
       if (DefMI.isCopyLike() || TII->isCopyInstr(DefMI)) {
-        auto Result = salvageCopySSA(DefMI);
+        auto Result = salvageCopySSA(DefMI, ArgDbgPHIs);
         MI.getOperand(0).ChangeToImmediate(Result.first);
         MI.getOperand(1).setImm(Result.second);
       } else {
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 6887347..87b8ac5 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -109,7 +109,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
     auto *ArgType = Arg.value()->getType();
     // Vector calls to intrinsics can still have
     // scalar operands for specific arguments.
-    if (hasVectorIntrinsicScalarOpd(IntrinsicID, Arg.index())) {
+    if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) {
       ScalarTypes.push_back(ArgType);
     } else {
       // The argument in this place should be a vector if
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e139cf6..e483c3a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -519,7 +519,9 @@ namespace {
 
     SDValue XformToShuffleWithZero(SDNode *N);
     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
-                                                    const SDLoc &DL, SDValue N0,
+                                                    const SDLoc &DL,
+                                                    SDNode *N,
+                                                    SDValue N0,
                                                     SDValue N1);
     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
                                       SDValue N1);
@@ -996,6 +998,7 @@ static bool canSplitIdx(LoadSDNode *LD) {
 
 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
                                                              const SDLoc &DL,
+                                                             SDNode *N,
                                                              SDValue N0,
                                                              SDValue N1) {
   // Currently this only tries to ensure we don't undo the GEP splits done by
@@ -1025,7 +1028,7 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
     return false;
   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
 
-  for (SDNode *Node : N0->uses()) {
+  for (SDNode *Node : N->uses()) {
     auto LoadStore = dyn_cast<MemSDNode>(Node);
     if (LoadStore) {
       // Is x[offset2] already not a legal addressing mode? If so then
@@ -2447,7 +2450,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
     return NewSel;
 
   // reassociate add
-  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
+  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
       return RADD;
 
@@ -15527,7 +15530,7 @@ static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
   // This means this is also safe for a signed input and unsigned output, since
   // a negative input would lead to undefined behavior.
   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
-  unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
+  unsigned OutputSize = (int)VT.getScalarSizeInBits();
   unsigned ActualSize = std::min(InputSize, OutputSize);
   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d667988..90e4b5d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4684,26 +4684,33 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
   return false;
 }
 
+static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) {
+  // Match masked merge pattern (X & ~M) op (Y & M)
+  // Including degenerate case (X & ~M) op M
+  auto MatchNoCommonBitsPattern = [&](SDValue NotM, SDValue Other) {
+    if (isBitwiseNot(NotM, true)) {
+      SDValue NotOperand = NotM->getOperand(0);
+      if (Other == NotOperand)
+        return true;
+      if (Other->getOpcode() == ISD::AND)
+        return NotOperand == Other->getOperand(0) ||
+               NotOperand == Other->getOperand(1);
+    }
+    return false;
+  };
+  if (A->getOpcode() == ISD::AND)
+    return MatchNoCommonBitsPattern(A->getOperand(0), B) ||
+           MatchNoCommonBitsPattern(A->getOperand(1), B);
+  return false;
+}
+
 // FIXME: unify with llvm::haveNoCommonBitsSet.
 bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
   assert(A.getValueType() == B.getValueType() &&
          "Values must have the same type");
-  // Match masked merge pattern (X & ~M) op (Y & M)
-  if (A->getOpcode() == ISD::AND && B->getOpcode() == ISD::AND) {
-    auto MatchNoCommonBitsPattern = [&](SDValue NotM, SDValue And) {
-      if (isBitwiseNot(NotM, true)) {
-        SDValue NotOperand = NotM->getOperand(0);
-        return NotOperand == And->getOperand(0) ||
-               NotOperand == And->getOperand(1);
-      }
-      return false;
-    };
-    if (MatchNoCommonBitsPattern(A->getOperand(0), B) ||
-        MatchNoCommonBitsPattern(A->getOperand(1), B) ||
-        MatchNoCommonBitsPattern(B->getOperand(0), A) ||
-        MatchNoCommonBitsPattern(B->getOperand(1), A))
-      return true;
-  }
+  if (haveNoCommonBitsSetCommutative(A, B) ||
+      haveNoCommonBitsSetCommutative(B, A))
+    return true;
   return KnownBits::haveNoCommonBitsSet(computeKnownBits(A),
                                         computeKnownBits(B));
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9732a17..b209aecf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9044,7 +9044,9 @@ void TargetLowering::expandUADDSUBO(
   if (IsAdd && isOneConstant(RHS)) {
     // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
     // the live range of X. We assume comparing with 0 is cheap.
-    // TODO: This generalizes to (X + C) < C.
+    // The general case (X + C) < C is not necessarily beneficial. Although we
+    // reduce the live range of X, we may introduce the materialization of
+    // constant C.
     SetCC =
         DAG.getSetCC(dl, SetCCType, Result,
                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index 3cade80..1aa2d44 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -2029,3 +2029,14 @@ void AttributeFuncs::mergeAttributesForOutlining(Function &Base,
   // that aspect in the merged function.
   mergeFnAttrs(Base, ToMerge);
 }
+
+void AttributeFuncs::updateMinLegalVectorWidthAttr(Function &Fn,
+                                                   uint64_t Width) {
+  Attribute Attr = Fn.getFnAttribute("min-legal-vector-width");
+  if (Attr.isValid()) {
+    uint64_t OldWidth;
+    Attr.getValueAsString().getAsInteger(0, OldWidth);
+    if (Width > OldWidth)
+      Fn.addFnAttr("min-legal-vector-width", llvm::utostr(Width));
+  }
+}
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 5dcf1ba..c182513 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -2068,6 +2068,17 @@ Constant *ConstantExpr::getTruncOrBitCast(Constant *C, Type *Ty) {
   return getTrunc(C, Ty);
 }
 
+Constant *ConstantExpr::getSExtOrTrunc(Constant *C, Type *Ty) {
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "Can only sign extend/truncate integers!");
+  Type *CTy = C->getType();
+  if (CTy->getScalarSizeInBits() < Ty->getScalarSizeInBits())
+    return getSExt(C, Ty);
+  if (CTy->getScalarSizeInBits() > Ty->getScalarSizeInBits())
+    return getTrunc(C, Ty);
+  return C;
+}
+
 Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) {
   assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast");
   assert((Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy()) &&
diff --git a/llvm/lib/Object/Binary.cpp b/llvm/lib/Object/Binary.cpp
index 67ed44a..1703f76 100644
--- a/llvm/lib/Object/Binary.cpp
+++ b/llvm/lib/Object/Binary.cpp
@@ -84,6 +84,7 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
   case file_magic::unknown:
   case file_magic::cuda_fatbinary:
   case file_magic::coff_cl_gl_object:
+  case file_magic::dxcontainer_object:
     // Unrecognized object file format.
     return errorCodeToError(object_error::invalid_file_type);
   case file_magic::minidump:
diff --git a/llvm/lib/Object/CMakeLists.txt b/llvm/lib/Object/CMakeLists.txt
index 0825210..ba612e3 100644
--- a/llvm/lib/Object/CMakeLists.txt
+++ b/llvm/lib/Object/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMObject
   COFFModuleDefinition.cpp
   COFFObjectFile.cpp
   Decompressor.cpp
+  DXContainer.cpp
   ELF.cpp
   ELFObjectFile.cpp
   Error.cpp
diff --git a/llvm/lib/Object/DXContainer.cpp b/llvm/lib/Object/DXContainer.cpp
new file mode 100644
index 0000000..e1aea562
--- /dev/null
+++ b/llvm/lib/Object/DXContainer.cpp
@@ -0,0 +1,44 @@
+//===- DXContainer.cpp - DXContainer object file implementation -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/DXContainer.h"
+#include "llvm/BinaryFormat/DXContainer.h"
+#include "llvm/Object/Error.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+static Error parseFailed(const Twine &Msg) {
+  return make_error<GenericBinaryError>(Msg.str(), object_error::parse_failed);
+}
+
+template <typename T>
+static Error readStruct(StringRef Buffer, const char *P, T &Struct) {
+  // Don't read before the beginning or past the end of the file
+  if (P < Buffer.begin() || P + sizeof(T) > Buffer.end())
+    return parseFailed("Reading structure out of file bounds");
+
+  memcpy(&Struct, P, sizeof(T));
+  // DXContainer is always BigEndian
+  if (sys::IsBigEndianHost)
+    Struct.byteSwap();
+  return Error::success();
+}
+
+DXContainer::DXContainer(MemoryBufferRef O) : Data(O) {}
+
+Error DXContainer::parseHeader() {
+  return readStruct(Data.getBuffer(), Data.getBuffer().data(), Header);
+}
+
+Expected<DXContainer> DXContainer::create(MemoryBufferRef Object) {
+  DXContainer Container(Object);
+  if (Error Err = Container.parseHeader())
+    return std::move(Err);
+  return Container;
+}
diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp
index fed6726..609dfae 100644
--- a/llvm/lib/Object/ObjectFile.cpp
+++ b/llvm/lib/Object/ObjectFile.cpp
@@ -147,6 +147,7 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type,
   case file_magic::minidump:
   case file_magic::goff_object:
   case file_magic::cuda_fatbinary:
+  case file_magic::dxcontainer_object:
     return errorCodeToError(object_error::invalid_file_type);
   case file_magic::tapi_file:
     return errorCodeToError(object_error::invalid_file_type);
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index 4c92502..3e5fff9 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -918,21 +918,34 @@ static size_t parseBackslash(StringRef Src, size_t I, SmallString<128> &Token) {
   return I - 1;
 }
 
-// Windows treats whitespace, double quotes, and backslashes specially.
+// Windows treats whitespace, double quotes, and backslashes specially, except
+// when parsing the first token of a full command line, in which case
+// backslashes are not special.
 static bool isWindowsSpecialChar(char C) {
   return isWhitespaceOrNull(C) || C == '\\' || C == '\"';
 }
+static bool isWindowsSpecialCharInCommandName(char C) {
+  return isWhitespaceOrNull(C) || C == '\"';
+}
 
 // Windows tokenization implementation. The implementation is designed to be
 // inlined and specialized for the two user entry points.
-static inline void
-tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
-                               function_ref<void(StringRef)> AddToken,
-                               bool AlwaysCopy, function_ref<void()> MarkEOL) {
+static inline void tokenizeWindowsCommandLineImpl(
+    StringRef Src, StringSaver &Saver, function_ref<void(StringRef)> AddToken,
+    bool AlwaysCopy, function_ref<void()> MarkEOL, bool InitialCommandName) {
   SmallString<128> Token;
 
+  // Sometimes, this function will be handling a full command line including an
+  // executable pathname at the start. In that situation, the initial pathname
+  // needs different handling from the following arguments, because when
+  // CreateProcess or cmd.exe scans the pathname, it doesn't treat \ as
+  // escaping the quote character, whereas when libc scans the rest of the
+  // command line, it does.
+  bool CommandName = InitialCommandName;
+
   // Try to do as much work inside the state machine as possible.
   enum { INIT, UNQUOTED, QUOTED } State = INIT;
+
   for (size_t I = 0, E = Src.size(); I < E; ++I) {
     switch (State) {
     case INIT: {
@@ -947,19 +960,29 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
       if (I >= E)
         break;
       size_t Start = I;
-      while (I < E && !isWindowsSpecialChar(Src[I]))
-        ++I;
+      if (CommandName) {
+        while (I < E && !isWindowsSpecialCharInCommandName(Src[I]))
+          ++I;
+      } else {
+        while (I < E && !isWindowsSpecialChar(Src[I]))
+          ++I;
+      }
       StringRef NormalChars = Src.slice(Start, I);
       if (I >= E || isWhitespaceOrNull(Src[I])) {
         // No special characters: slice out the substring and start the next
         // token. Copy the string if the caller asks us to.
         AddToken(AlwaysCopy ? Saver.save(NormalChars) : NormalChars);
-        if (I < E && Src[I] == '\n')
+        if (I < E && Src[I] == '\n') {
           MarkEOL();
+          CommandName = InitialCommandName;
+        } else {
+          CommandName = false;
+        }
       } else if (Src[I] == '\"') {
         Token += NormalChars;
         State = QUOTED;
       } else if (Src[I] == '\\') {
+        assert(!CommandName && "or else we'd have treated it as a normal char");
         Token += NormalChars;
         I = parseBackslash(Src, I, Token);
         State = UNQUOTED;
@@ -976,12 +999,16 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
         // token.
         AddToken(Saver.save(Token.str()));
         Token.clear();
-        if (Src[I] == '\n')
+        if (Src[I] == '\n') {
+          CommandName = InitialCommandName;
           MarkEOL();
+        } else {
+          CommandName = false;
+        }
         State = INIT;
       } else if (Src[I] == '\"') {
         State = QUOTED;
-      } else if (Src[I] == '\\') {
+      } else if (Src[I] == '\\' && !CommandName) {
         I = parseBackslash(Src, I, Token);
       } else {
         Token.push_back(Src[I]);
@@ -999,7 +1026,7 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
           // Otherwise, end the quoted portion and return to the unquoted state.
           State = UNQUOTED;
         }
-      } else if (Src[I] == '\\') {
+      } else if (Src[I] == '\\' && !CommandName) {
         I = parseBackslash(Src, I, Token);
       } else {
         Token.push_back(Src[I]);
@@ -1008,7 +1035,7 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
     }
   }
 
-  if (State == UNQUOTED)
+  if (State != INIT)
     AddToken(Saver.save(Token.str()));
 }
 
@@ -1021,7 +1048,7 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
       NewArgv.push_back(nullptr);
   };
   tokenizeWindowsCommandLineImpl(Src, Saver, AddToken,
-                                 /*AlwaysCopy=*/true, OnEOL);
+                                 /*AlwaysCopy=*/true, OnEOL, false);
 }
 
 void cl::TokenizeWindowsCommandLineNoCopy(StringRef Src, StringSaver &Saver,
@@ -1029,7 +1056,19 @@ void cl::TokenizeWindowsCommandLineNoCopy(StringRef Src, StringSaver &Saver,
   auto AddToken = [&](StringRef Tok) { NewArgv.push_back(Tok); };
   auto OnEOL = []() {};
   tokenizeWindowsCommandLineImpl(Src, Saver, AddToken, /*AlwaysCopy=*/false,
-                                 OnEOL);
+                                 OnEOL, false);
+}
+
+void cl::TokenizeWindowsCommandLineFull(StringRef Src, StringSaver &Saver,
+                                        SmallVectorImpl<const char *> &NewArgv,
+                                        bool MarkEOLs) {
+  auto AddToken = [&](StringRef Tok) { NewArgv.push_back(Tok.data()); };
+  auto OnEOL = [&]() {
+    if (MarkEOLs)
+      NewArgv.push_back(nullptr);
+  };
+  tokenizeWindowsCommandLineImpl(Src, Saver, AddToken,
+                                 /*AlwaysCopy=*/true, OnEOL, true);
 }
 
 void cl::tokenizeConfigFile(StringRef Source, StringSaver &Saver,
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 98272bb..976599f 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -296,6 +296,12 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
     }
   }
 
+  if (Implementer == "0xc0") { // Ampere Computing
+    return StringSwitch<const char *>(Part)
+        .Case("0xac3", "ampere1")
+        .Default("generic");
+  }
+
   return "generic";
 }
 
diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc
index dfaab16..e415674 100644
--- a/llvm/lib/Support/Windows/Process.inc
+++ b/llvm/lib/Support/Windows/Process.inc
@@ -247,7 +247,7 @@ windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
 
   SmallVector<const char *, 20> TmpArgs;
   StringSaver Saver(Alloc);
-  cl::TokenizeWindowsCommandLine(Cmd, Saver, TmpArgs, /*MarkEOLs=*/false);
+  cl::TokenizeWindowsCommandLineFull(Cmd, Saver, TmpArgs, /*MarkEOLs=*/false);
 
   for (const char *Arg : TmpArgs) {
     EC = WildcardExpand(Arg, Args, Saver);
@@ -255,6 +255,9 @@ windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
       return EC;
   }
 
+  if (Args.size() == 0)
+    return std::make_error_code(std::errc::invalid_argument);
+
   SmallVector<char, MAX_PATH> Arg0(Args[0], Args[0] + strlen(Args[0]));
   SmallVector<char, MAX_PATH> Filename;
   sys::path::remove_filename(Arg0);
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index bd6deb5..2682b9b 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -567,6 +567,7 @@ include "AArch64Schedule.td"
 include "AArch64InstrInfo.td"
 include "AArch64SchedPredicates.td"
 include "AArch64SchedPredExynos.td"
+include "AArch64SchedPredAmpere.td"
 include "AArch64Combine.td"
 
 def AArch64InstrInfo : InstrInfo;
@@ -636,6 +637,7 @@ include "AArch64SchedThunderX2T99.td"
 include "AArch64SchedA64FX.td"
 include "AArch64SchedThunderX3T110.td"
 include "AArch64SchedTSV110.td"
+include "AArch64SchedAmpere1.td"
 
 def TuneA35     : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
                                 "Cortex-A35 ARM processors">;
@@ -956,6 +958,16 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
                                   FeatureFuseAES,
                                   FeaturePostRAScheduler]>;
 
+def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
+                                   "Ampere Computing Ampere-1 processors", [
+                                   FeaturePostRAScheduler,
+                                   FeatureFuseAES,
+                                   FeatureLSLFast,
+                                   FeatureAggressiveFMA,
+                                   FeatureArithmeticBccFusion,
+                                   FeatureCmpBccFusion,
+                                   FeatureFuseAddress,
+                                   FeatureFuseLiterals]>;
 
 def ProcessorFeatures {
   list<SubtargetFeature> A53  = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
@@ -1067,6 +1079,8 @@ def ProcessorFeatures {
   list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
                                    FeatureNEON, FeaturePerfMon, FeatureSPE,
                                    FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
+  list<SubtargetFeature> Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
+                                    FeatureMTE, FeatureSSBS];
 
   // ETE and TRBE are future architecture extensions. We temporarily enable them
   // by default for users targeting generic AArch64. The extensions do not
@@ -1205,6 +1219,10 @@ def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX,
 def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel,
                      [TuneCarmel]>;
 
+// Ampere Computing
+def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1,
+                     [TuneAmpere1]>;
+
 //===----------------------------------------------------------------------===//
 // Assembly parser
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index c367d2d..71911b6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -5092,12 +5092,19 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
                                                    SDValue &OffImm) {
   const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
   const DataLayout &DL = CurDAG->getDataLayout();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
 
   if (N.getOpcode() == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
-    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
-    return true;
+    // We can only encode VL scaled offsets, so only fold in frame indexes
+    // referencing SVE objects.
+    if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+      OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
+      return true;
+    }
+
+    return false;
   }
 
   if (MemVT == EVT())
@@ -5124,7 +5131,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
   Base = N.getOperand(0);
   if (Base.getOpcode() == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+    // We can only encode VL scaled offsets, so only fold in frame indexes
+    // referencing SVE objects.
+    if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector)
+      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
   }
 
   OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
diff --git a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
new file mode 100644
index 0000000..32f7299
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
@@ -0,0 +1,1136 @@
+//=- AArch64SchedAmpere1.td - Ampere-1 scheduling def -----*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the Ampere Computing Ampere-1 to
+// support instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+// The Ampere-1 core is an out-of-order micro-architecture.  The front
+// end has branch prediction, with a 10-cycle recovery time from a
+// mispredicted branch.  Instructions coming out of the front end are
+// decoded into internal micro-ops (uops).
+
+def Ampere1Model : SchedMachineModel {
+  let IssueWidth            =   4;  // 4-way decode and dispatch
+  let MicroOpBufferSize     = 174;  // micro-op re-order buffer size
+  let LoadLatency           =   4;  // Optimistic load latency
+  let MispredictPenalty     =  10;  // Branch mispredict penalty
+  let LoopMicroOpBufferSize =  32;  // Instruction queue size
+  let CompleteModel = 1;
+
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    SMEUnsupported.F);
+}
+
+let SchedModel = Ampere1Model in {
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Ampere-1.
+// Ampere-1 has 12 pipelines that 8 independent scheduler (4 integer, 2 FP,
+// and 2 memory) issue into.  The integer and FP schedulers can each issue
+// one uop per cycle, while the memory schedulers can each issue one load
+// and one store address calculation per cycle.
+
+def Ampere1UnitA  : ProcResource<2>;  // integer single-cycle, branch, and flags r/w
+def Ampere1UnitB  : ProcResource<2>;  // integer single-cycle, and complex shifts
+def Ampere1UnitBS : ProcResource<1>;  // integer multi-cycle
+def Ampere1UnitL  : ProcResource<2>;  // load
+def Ampere1UnitS  : ProcResource<2>;  // store address calculation
+def Ampere1UnitX  : ProcResource<1>;  // FP and vector operations, and flag write
+def Ampere1UnitY  : ProcResource<1>;  // FP and vector operations, and crypto
+def Ampere1UnitZ  : ProcResource<1>;  // FP store data and FP-to-integer moves
+
+def Ampere1UnitAB : ProcResGroup<[Ampere1UnitA, Ampere1UnitB]>;
+def Ampere1UnitXY : ProcResGroup<[Ampere1UnitX, Ampere1UnitY]>;
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Ampere-1.
+
+def Ampere1Write_1cyc_1A : SchedWriteRes<[Ampere1UnitA]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_1cyc_2A : SchedWriteRes<[Ampere1UnitA, Ampere1UnitA]> {
+  let Latency = 1;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_1cyc_1B : SchedWriteRes<[Ampere1UnitB]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_1cyc_1AB : SchedWriteRes<[Ampere1UnitAB]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_1cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_1cyc_1S : SchedWriteRes<[Ampere1UnitS]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_1cyc_2S : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS]> {
+  let Latency = 1;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1Y : SchedWriteRes<[Ampere1UnitY]> {
+  let Latency = 2;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_2cyc_2AB : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitAB]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1B_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitAB]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1B_1A : SchedWriteRes<[Ampere1UnitB, Ampere1UnitA]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1AB_1A : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitA]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1AB_2S : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS,
+                                                             Ampere1UnitS]> {
+  let Latency = 2;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_2cyc_1AB_1S_1Z : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS,
+                                                                Ampere1UnitZ]> {
+  let Latency = 2;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_2cyc_1B_1S : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+  let Latency = 2;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_2cyc_1S_1Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_3cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
+  let Latency = 3;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_3cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+  let Latency = 3;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_3cyc_1B_1S_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS,
+                                                               Ampere1UnitAB]> {
+  let Latency = 2;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_3cyc_1S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ, Ampere1UnitZ]> {
+  let Latency = 2;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_3cyc_2S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS,
+                                             Ampere1UnitZ, Ampere1UnitZ]> {
+  let Latency = 2;
+  let NumMicroOps = 4;
+}
+
+def Ampere1Write_4cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
+  let Latency = 4;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_4cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
+  let Latency = 4;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_4cyc_1X : SchedWriteRes<[Ampere1UnitX]> {
+  let Latency = 4;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_4cyc_1Y : SchedWriteRes<[Ampere1UnitY]> {
+  let Latency = 4;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_4cyc_1Z : SchedWriteRes<[Ampere1UnitZ]> {
+  let Latency = 4;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_4cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_4cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+  let Latency = 4;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_4cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitS, Ampere1UnitZ]> {
+  let Latency = 4;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_4cyc_3S_3Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, Ampere1UnitS,
+                                             Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> {
+  let Latency = 4;
+  let NumMicroOps = 6;
+}
+
+def Ampere1Write_5cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
+  let Latency = 5;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_5cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_5cyc_1X : SchedWriteRes<[Ampere1UnitX]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_5cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_5cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> {
+  let Latency = 5;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_5cyc_1L_1BS : SchedWriteRes<[Ampere1UnitL, Ampere1UnitBS]> {
+  let Latency = 5;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_5cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_5cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 5;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_5cyc_4S_4Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS,
+                                             Ampere1UnitS, Ampere1UnitS,
+                                             Ampere1UnitZ, Ampere1UnitZ,
+                                             Ampere1UnitZ, Ampere1UnitZ]> {
+  let Latency = 5;
+  let NumMicroOps = 8;
+}
+
+def Ampere1Write_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+                                                 Ampere1UnitS, Ampere1UnitS,
+                                                 Ampere1UnitZ, Ampere1UnitZ]> {
+  let Latency = 5;
+  let NumMicroOps = 6;
+}
+
+def Ampere1Write_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+                                                 Ampere1UnitS, Ampere1UnitS,
+                                                 Ampere1UnitZ, Ampere1UnitZ]> {
+  let Latency = 6;
+  let NumMicroOps = 6;
+}
+
+def Ampere1Write_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY,
+                                                 Ampere1UnitS, Ampere1UnitS, Ampere1UnitS,
+                                                 Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> {
+  let Latency = 6;
+  let NumMicroOps = 9;
+}
+
+def Ampere1Write_6cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_6cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+  let Latency = 6;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_6cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_6cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 6;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_6cyc_3L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL]> {
+  let Latency = 6;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_6cyc_4L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+                                          Ampere1UnitL, Ampere1UnitL]> {
+  let Latency = 6;
+  let NumMicroOps = 4;
+}
+
+def Ampere1Write_6cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_7cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
+  let Latency = 7;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_7cyc_1BS_1XY : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitXY]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_7cyc_1L_1XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitXY]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_7cyc_2L_2XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+                                              Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 7;
+  let NumMicroOps = 4;
+}
+
+def Ampere1Write_7cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+                                                 Ampere1UnitXY, Ampere1UnitXY,
+                                                 Ampere1UnitS, Ampere1UnitS,
+                                                 Ampere1UnitS, Ampere1UnitS,
+                                                 Ampere1UnitZ, Ampere1UnitZ,
+                                                 Ampere1UnitZ, Ampere1UnitZ]> {
+  let Latency = 7;
+  let NumMicroOps = 12;
+}
+
+def Ampere1Write_8cyc_1BS_1A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA]> {
+  let Latency = 8;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_8cyc_1BS_2A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA,
+                                                             Ampere1UnitA]> {
+  let Latency = 8;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_8cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 8;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_8cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+                                           Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 8;
+  let NumMicroOps = 4;
+}
+
+def Ampere1Write_8cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
+                                              Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 8;
+  let NumMicroOps = 6;
+}
+
+def Ampere1Write_8cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+                                              Ampere1UnitL, Ampere1UnitL,
+                                              Ampere1UnitXY, Ampere1UnitXY,
+                                              Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 8;
+  let NumMicroOps = 8;
+}
+
+def Ampere1Write_9cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
+                                              Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 9;
+  let NumMicroOps = 6;
+}
+
+def Ampere1Write_9cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+                                              Ampere1UnitL, Ampere1UnitL,
+                                              Ampere1UnitXY, Ampere1UnitXY,
+                                              Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 9;
+  let NumMicroOps = 8;
+}
+
+def Ampere1Write_9cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 9;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_9cyc_2L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+                                              Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 9;
+  let NumMicroOps = 5;
+}
+
+def Ampere1Write_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+                                                 Ampere1UnitXY, Ampere1UnitXY,
+                                                 Ampere1UnitXY, Ampere1UnitXY,
+                                                 Ampere1UnitS, Ampere1UnitS,
+                                                 Ampere1UnitS, Ampere1UnitS,
+                                                 Ampere1UnitZ, Ampere1UnitZ,
+                                                 Ampere1UnitZ, Ampere1UnitZ]> {
+  let Latency = 9;
+  let NumMicroOps = 14;
+}
+
+def Ampere1Write_9cyc_8XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+                                                 Ampere1UnitXY, Ampere1UnitXY,
+                                                 Ampere1UnitXY, Ampere1UnitXY,
+                                                 Ampere1UnitXY, Ampere1UnitXY,
+                                                 Ampere1UnitS, Ampere1UnitS,
+                                                 Ampere1UnitS, Ampere1UnitS,
+                                                 Ampere1UnitZ, Ampere1UnitZ,
+                                                 Ampere1UnitZ, Ampere1UnitZ]> {
+  let Latency = 9;
+  let NumMicroOps = 16;
+}
+
+def Ampere1Write_10cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 10;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_10cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> {
+  let Latency = 10;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_10cyc_1X_1Z : SchedWriteRes<[Ampere1UnitX, Ampere1UnitZ]> {
+  let Latency = 10;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_10cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
+                                               Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 10;
+  let NumMicroOps = 6;
+}
+
+def Ampere1Write_10cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> {
+  let Latency = 10;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_10cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> {
+  let Latency = 10;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_11cyc_1BS_1L : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitL]> {
+  let Latency = 11;
+  let NumMicroOps = 2;
+}
+
+def Ampere1Write_11cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> {
+  let Latency = 11;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_11cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> {
+  let Latency = 11;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_11cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+                                               Ampere1UnitL, Ampere1UnitL,
+                                               Ampere1UnitXY, Ampere1UnitXY,
+                                               Ampere1UnitXY, Ampere1UnitXY,
+                                               Ampere1UnitXY, Ampere1UnitXY,
+                                               Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 11;
+  let NumMicroOps = 12;
+}
+
+def Ampere1Write_12cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+                                               Ampere1UnitL, Ampere1UnitL,
+                                               Ampere1UnitXY, Ampere1UnitXY,
+                                               Ampere1UnitXY, Ampere1UnitXY,
+                                               Ampere1UnitXY, Ampere1UnitXY,
+                                               Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 12;
+  let NumMicroOps = 12;
+}
+
+def Ampere1Write_12cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 12;
+  let NumMicroOps = 3;
+}
+
+def Ampere1Write_12cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+                                            Ampere1UnitXY, Ampere1UnitXY]> {
+  let Latency = 12;
+  let NumMicroOps = 4;
+}
+
+def Ampere1Write_18cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
+  let Latency = 18;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_19cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+  let Latency = 19;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_25cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+  let Latency = 25;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_32cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+  let Latency = 32;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_34cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
+  let Latency = 34;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_34cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+  let Latency = 34;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_39cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+  let Latency = 39;
+  let NumMicroOps = 1;
+}
+
+def Ampere1Write_62cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+  let Latency = 62;
+  let NumMicroOps = 1;
+}
+
+// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4),
+// which are a single uop, and for extended registers, which have full flexibility
+// across Unit A or B for both uops.
+def Ampere1Write_Arith : SchedWriteVariant<[
+                                SchedVar<RegExtendedPred, [Ampere1Write_2cyc_2AB]>,
+                                SchedVar<AmpereCheapLSL,  [Ampere1Write_1cyc_1AB]>,
+                                SchedVar<NoSchedPred,     [Ampere1Write_2cyc_1B_1AB]>]>;
+
+def Ampere1Write_ArithFlagsetting : SchedWriteVariant<[
+                                SchedVar<RegExtendedPred, [Ampere1Write_2cyc_1AB_1A]>,
+                                SchedVar<AmpereCheapLSL,  [Ampere1Write_1cyc_1A]>,
+                                SchedVar<NoSchedPred,     [Ampere1Write_2cyc_1B_1A]>]>;
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latencies for Ampere-1.
+// This provides a coarse model, which is then specialised below.
+
+def : WriteRes<WriteImm,   [Ampere1UnitAB]>;  // MOVN, MOVZ
+def : WriteRes<WriteI,     [Ampere1UnitAB]>;  // ALU
+def : WriteRes<WriteISReg, [Ampere1UnitB, Ampere1UnitA]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}  // ALU of Shifted-Reg
+def : WriteRes<WriteIEReg, [Ampere1UnitAB, Ampere1UnitA]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}  // ALU of Extended-Reg
+def : WriteRes<WriteExtr,  [Ampere1UnitB]>;  // EXTR shifts a reg pair
+def : WriteRes<WriteIS,    [Ampere1UnitB]>;  // Shift/Scale
+def : WriteRes<WriteID32,  [Ampere1UnitBS]> {
+  let Latency = 18;
+}  // 32-bit Divide
+def : WriteRes<WriteID64,  [Ampere1UnitBS]> {
+  let Latency = 34;
+}  // 64-bit Divide
+def : WriteRes<WriteIM32,  [Ampere1UnitBS]> {
+  let Latency = 3;
+}  // 32-bit Multiply
+def : WriteRes<WriteIM64,  [Ampere1UnitBS]> {
+  let Latency = 3;
+}  // 32-bit Multiply
+def : WriteRes<WriteBr,    [Ampere1UnitA]>;
+def : WriteRes<WriteBrReg, [Ampere1UnitA, Ampere1UnitA]>;
+def : WriteRes<WriteLD,    [Ampere1UnitL]> {
+  let Latency = 4;
+}  // Load from base addr plus immediate offset
+def : WriteRes<WriteST,    [Ampere1UnitS]> {
+  let Latency = 1;
+}  // Store to base addr plus immediate offset
+def : WriteRes<WriteSTP,   [Ampere1UnitS, Ampere1UnitS]> {
+  let Latency = 1;
+  let NumMicroOps = 2;
+}  // Store a register pair.
+def : WriteRes<WriteAdr,   [Ampere1UnitAB]>;
+def : WriteRes<WriteLDIdx, [Ampere1UnitAB, Ampere1UnitS]> {
+  let Latency = 5;
+  let NumMicroOps = 2;
+}  // Load from a register index (maybe scaled).
+def : WriteRes<WriteSTIdx, [Ampere1UnitS, Ampere1UnitS]> {
+  let Latency = 1;
+  let NumMicroOps = 2;
+}  // Store to a register index (maybe scaled).
+def : WriteRes<WriteF,  [Ampere1UnitXY]> {
+  let Latency = 2;
+}  // General floating-point ops.
+def : WriteRes<WriteFCmp,  [Ampere1UnitX]> {
+  let Latency = 5;
+}  // Floating-point compare.
+def : WriteRes<WriteFCvt,  [Ampere1UnitXY]> {
+  let Latency = 6;
+}  // Float conversion.
+def : WriteRes<WriteFCopy, [Ampere1UnitXY]> {
+}  // Float-int register copy.
+def : WriteRes<WriteFImm,  [Ampere1UnitXY]> {
+  let Latency = 2;
+}  // Float-int register copy.
+def : WriteRes<WriteFMul,  [Ampere1UnitXY]> {
+  let Latency = 5;
+}  // Floating-point multiply.
+def : WriteRes<WriteFDiv,  [Ampere1UnitXY]> {
+  let Latency = 34;
+}  // Floating-point division.
+def : WriteRes<WriteVd,    [Ampere1UnitXY]> {
+  let Latency = 3;
+}  // 64bit Vector D ops.
+def : WriteRes<WriteVq,    [Ampere1UnitXY]> {
+  let Latency = 3;
+}  // 128bit Vector Q ops.
+def : WriteRes<WriteVLD,   [Ampere1UnitL, Ampere1UnitL]> {
+  let Latency = 5;
+}  // Vector loads.
+def : WriteRes<WriteVST,   [Ampere1UnitS, Ampere1UnitZ]> {
+  let Latency = 2;
+}  // Vector stores.
+
+def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
+
+def : WriteRes<WriteSys,     []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint,    []> { let Latency = 1; }
+
+def : WriteRes<WriteLDHi,    []> {
+  let Latency = 4;
+}  // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP
+
+// Forwarding logic.
+def : ReadAdvance<ReadI,       0>;
+def : ReadAdvance<ReadISReg,   0>;
+def : ReadAdvance<ReadIEReg,   0>;
+def : ReadAdvance<ReadIM,      0>;
+def : ReadAdvance<ReadIMA,     1, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID,      0>;
+def : ReadAdvance<ReadExtrHi,  0>;
+def : ReadAdvance<ReadST,      0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD,     0>;
+
+//===----------------------------------------------------------------------===//
+// Specialising the scheduling model further for Ampere-1.
+
+def : InstRW<[Ampere1Write_1cyc_1AB], (instrs COPY)>;
+
+// Branch instructions
+def : InstRW<[Ampere1Write_1cyc_1A], (instrs Bcc, BL, RET)>;
+def : InstRW<[Ampere1Write_1cyc_1A],
+        (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>;
+def : InstRW<[Ampere1Write_1cyc_2A], (instrs BLR)>;
+
+// Cryptography instructions
+// -- AES encryption/decryption
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AES[DE]")>;
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AESI?MC")>;
+// -- Polynomial multiplication
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^PMUL", "^PMULL")>;
+// -- SHA-256 hash
+def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA256(H|H2)")>;
+// -- SHA-256 schedule update
+def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA256SU[01]")>;
+// -- SHA-3 instructions
+def : InstRW<[Ampere1Write_2cyc_1XY],
+        (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>;
+// -- SHA-512 hash
+def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA512(H|H2)")>;
+// -- SHA-512 schedule update
+def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA512SU[01]")>;
+// -- SHA1 choose/majority/parity
+def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA1[CMP]")>;
+// -- SHA1 hash/schedule update
+def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1SU[01]")>;
+def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1H")>;
+
+// FP and vector load instructions
+// -- Load 1-element structure to one/all lanes
+// ---- all lanes
+def : InstRW<[Ampere1Write_7cyc_1L_1XY],
+        (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>;
+// ---- one lane
+def : InstRW<[Ampere1Write_7cyc_1L_1XY],
+        (instregex "^LD1i(8|16|32|64)")>;
+// -- Load 1-element structure to one/all lanes, 1D size
+def : InstRW<[Ampere1Write_5cyc_1L],
+        (instregex "^LD1Rv1d")>;
+// -- Load 1-element structures to 1 register
+def : InstRW<[Ampere1Write_5cyc_1L],
+        (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Load 1-element structures to 2 registers
+def : InstRW<[Ampere1Write_5cyc_2L],
+        (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Load 1-element structures to 3 registers
+def : InstRW<[Ampere1Write_6cyc_3L],
+        (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Load 1-element structures to 4 registers
+def : InstRW<[Ampere1Write_6cyc_4L],
+        (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Load 2-element structure to all lanes of 2 registers, 1D size
+def : InstRW<[Ampere1Write_5cyc_2L],
+        (instregex "^LD2Rv1d")>;
+// -- Load 2-element structure to all lanes of 2 registers, other sizes
+def : InstRW<[Ampere1Write_7cyc_2L_2XY],
+        (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>;
+// -- Load 2-element structure to one lane of 2 registers
+def : InstRW<[Ampere1Write_7cyc_2L_2XY],
+        (instregex "^LD2i(8|16|32|64)")>;
+// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size
+def : InstRW<[Ampere1Write_7cyc_2L_2XY],
+        (instregex "^LD2Twov(16b|8h|4s|2d)")>;
+// -- Load 2-element structures to 2 registers, 8B/4H/2S size
+def : InstRW<[Ampere1Write_9cyc_2L_3XY],
+        (instregex "^LD2Twov(8b|4h|2s)")>;
+// -- Load 3-element structure to all lanes of 3 registers, 1D size
+def : InstRW<[Ampere1Write_6cyc_3L],
+        (instregex "^LD3Rv1d")>;
+// -- Load 3-element structure to all lanes of 3 registers, other sizes
+def : InstRW<[Ampere1Write_8cyc_3L_3XY],
+        (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>;
+// -- Load 3-element structure to one lane of 3 registers
+def : InstRW<[Ampere1Write_8cyc_3L_3XY],
+        (instregex "^LD3i(8|16|32|64)")>;
+// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes
+def : InstRW<[Ampere1Write_9cyc_3L_3XY],
+        (instregex "^LD3Threev(16b|8h|4s)")>;
+// -- Load 3-element structures to 3 registers, 2D size
+def : InstRW<[Ampere1Write_8cyc_3L_3XY],
+        (instregex "^LD3Threev2d")>;
+// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes
+def : InstRW<[Ampere1Write_10cyc_3L_3XY],
+        (instregex "^LD3Threev(8b|4h|2s)")>;
+// -- Load 4-element structure to all lanes of 4 registers, 1D size
+def : InstRW<[Ampere1Write_6cyc_4L],
+        (instregex "^LD4Rv1d")>;
+// -- Load 4-element structure to all lanes of 4 registers, other sizes
+def : InstRW<[Ampere1Write_8cyc_4L_4XY],
+        (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>;
+// -- Load 4-element structure to one lane of 4 registers
+def : InstRW<[Ampere1Write_6cyc_4L],
+        (instregex "^LD4i(8|16|32|64)")>;
+// -- Load 4-element structures to 4 registers, 2D size
+def : InstRW<[Ampere1Write_9cyc_4L_4XY],
+        (instregex "^LD4Fourv2d")>;
+// -- Load 4-element structures to 4 registers, 2S size
+def : InstRW<[Ampere1Write_12cyc_4L_8XY],
+        (instregex "^LD4Fourv2s")>;
+// -- Load 4-element structures to 4 registers, other sizes
+def : InstRW<[Ampere1Write_11cyc_4L_8XY],
+        (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>;
+// -- Load pair, Q-form
+def : InstRW<[Ampere1Write_5cyc_2L], (instregex "LDN?PQ")>;
+// -- Load pair, S/D-form
+def : InstRW<[Ampere1Write_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>;
+// -- Load register
+def : InstRW<[Ampere1Write_5cyc_1L], (instregex "LDU?R[BHSDQ]i")>;
+// -- Load register, sign-extended register
+def : InstRW<[Ampere1Write_6cyc_1AB_1L], (instregex "LDR[BHSDQ]ro(W|X)")>;
+
+// FP and vector store instructions
+// -- Store 1-element structure from one lane of 1 register
+def : InstRW<[Ampere1Write_4cyc_1XY_1S_1Z],
+        (instregex "^ST1i(8|16|32|64)")>;
+// -- Store 1-element structures from 1 register
+def : InstRW<[Ampere1Write_2cyc_1S_1Z],
+        (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Store 1-element structures from 2 registers
+def : InstRW<[Ampere1Write_3cyc_2S_2Z],
+        (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Store 1-element structures from 3 registers
+def : InstRW<[Ampere1Write_4cyc_3S_3Z],
+        (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Store 1-element structures from 4 registers
+def : InstRW<[Ampere1Write_5cyc_4S_4Z],
+        (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Store 2-element structure from one lane of 2 registers
+def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z],
+        (instregex "^ST2i(8|16|32|64)")>;
+// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes
+def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z],
+        (instregex "^ST2Twov(16b|8h|4s|2d)")>;
+// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes
+def : InstRW<[Ampere1Write_6cyc_2XY_2S_2Z],
+        (instregex "^ST2Twov(8b|4h|2s)")>;
+// -- Store 3-element structure from one lane of 3 registers
+def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z],
+        (instregex "^ST3i(8|16|32|64)")>;
+// -- Store 3-element structures from 3 registers
+def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z],
+        (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Store 4-element structure from one lane of 4 registers
+def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z],
+        (instregex "^ST4i(8|16|32|64)")>;
+// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes
+def : InstRW<[Ampere1Write_9cyc_8XY_4S_4Z],
+        (instregex "^ST4Fourv(16b|8h|4s)")>;
+// -- Store 4-element structures from 4 registers, 2D sizes
+def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z],
+        (instregex "^ST4Fourv2d")>;
+// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes
+def : InstRW<[Ampere1Write_9cyc_6XY_4S_4Z],
+        (instregex "^ST4Fourv(8b|4h|2s)")>;
+// -- Store pair, Q-form
+def : InstRW<[Ampere1Write_3cyc_2S_2Z], (instregex "^STN?PQ")>;
+// -- Store pair, S/D-form
+def : InstRW<[Ampere1Write_3cyc_1S_2Z],	(instregex "^STN?P[SD]")>;
+// -- Store register
+def : InstRW<[Ampere1Write_2cyc_1S_1Z],	(instregex "^STU?R[BHSDQ](ui|i)")>;
+// -- Store register, sign-extended register offset
+def : InstRW<[Ampere1Write_2cyc_1AB_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>;
+
+// FP data processing, bfloat16 format
+def : InstRW<[Ampere1Write_5cyc_1XY], (instrs BFCVT)>;
+def : InstRW<[Ampere1Write_7cyc_2XY], (instrs BFCVTN, BFCVTN2)>;
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>;
+def : InstRW<[Ampere1Write_4cyc_2XY], (instrs BFMMLA)>;
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^BFMLAL")>;
+
+// FP data processing, scalar/vector, half precision
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY],
+        (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY],
+        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY],
+        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>;
+def : InstRW<[Ampere1Write_4cyc_1X],
+        (instregex "^FCMPE?H")>;
+def : InstRW<[Ampere1Write_10cyc_1A_1BS_1X],
+        (instregex "^FCCMPE?H")>;
+def : InstRW<[Ampere1Write_10cyc_1A_1BS_1XY],
+        (instregex "^FCSELH")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^[SU]CVTFv.[fi]16")>;
+def : InstRW<[Ampere1Write_25cyc_1XY], (instregex "^FDIVv.[if]16", "FDIVH")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>;
+def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>;
+def : InstRW<[Ampere1Write_12cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FMULX?v.[if]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instrs FMULX16)>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRECPXv.[if]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>;
+def : InstRW<[Ampere1Write_39cyc_1XY], (instregex "^FSQRTv.f16", "^FSQRTHr")>;
+
+// FP data processing, scalar/vector, single/double precision
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>;
+def : InstRW<[Ampere1Write_5cyc_1XY],
+        (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>;
+def : InstRW<[Ampere1Write_5cyc_1XY],
+        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>;
+def : InstRW<[Ampere1Write_5cyc_1XY],
+        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>;
+def : InstRW<[Ampere1Write_5cyc_1X],
+        (instregex "^FCMPE?(S|D)")>;
+def : InstRW<[Ampere1Write_11cyc_1A_1BS_1X],
+        (instregex "^FCCMPE?(S|D)")>;
+def : InstRW<[Ampere1Write_11cyc_1A_1BS_1XY],
+        (instregex "^FCSEL(S|D)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^[SU]CVTFv.[fi](32|64)")>;
+def : InstRW<[Ampere1Write_34cyc_1XY], (instregex "^FDIVv.[if](64)", "FDIVD")>;
+def : InstRW<[Ampere1Write_19cyc_1XY], (instregex "^FDIVv.[if](32)", "FDIVS")>;
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>;
+def : InstRW<[Ampere1Write_10cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instrs FMULX32, FMULX64)>;
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>;
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>;
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT(32|64)")>;
+def : InstRW<[Ampere1Write_62cyc_1XY], (instregex "^FSQRTv.f64", "^FSQRTDr")>;
+def : InstRW<[Ampere1Write_32cyc_1XY], (instregex "^FSQRTv.f32", "^FSQRTSr")>;
+
+// FP miscellaneous instructions
+def : InstRW<[Ampere1Write_10cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FCVT[HSD]Hr")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[HSD][SD]r")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVTLv")>;
+def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^FCVT(N|XN)v")>;
+def : InstRW<[Ampere1Write_10cyc_1X_1Z], (instrs FJCVTZS)>;
+def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>;
+def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>;
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>;
+def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "^FMOVXDHighr")>;
+def : InstRW<[Ampere1Write_4cyc_1Z], (instregex "^FMOV[WX][HSD]r")>;
+
+// Integer arithmetic and logical instructions
+def : InstRW<[Ampere1Write_1cyc_1A],
+        (instregex "ADC(W|X)r", "SBC(W|X)r")>;
+def : InstRW<[Ampere1Write_Arith],
+        (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r")>;
+def : InstRW<[Ampere1Write_ArithFlagsetting],
+        (instregex "(ADD|AND|BIC|SUB)S(W|X)r")>;
+def : InstRW<[Ampere1Write_1cyc_1A],
+        (instregex "(ADC|SBC)S(W|X)r")>;
+def : InstRW<[Ampere1Write_1cyc_1A], (instrs RMIF)>;
+def : InstRW<[Ampere1Write_1cyc_1A],
+        (instregex "(CCMN|CCMP)(X|W)")>;
+def : InstRW<[Ampere1Write_1cyc_1A],
+        (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>;
+def : InstRW<[Ampere1Write_18cyc_1BS], (instrs SDIVWr, UDIVWr)>;
+def : InstRW<[Ampere1Write_34cyc_1BS], (instrs SDIVXr, UDIVXr)>;
+def : InstRW<[Ampere1Write_3cyc_1BS],
+        (instregex "(S|U)MULHr")>;
+def : InstRW<[Ampere1Write_4cyc_1BS],
+        (instregex "(S|U)?M(ADD|SUB)L?r")>;
+
+// Integer load instructions
+def : InstRW<[Ampere1Write_4cyc_2L],
+        (instregex "(LDNP|LDP|LDPSW)(X|W)")>;
+def : InstRW<[Ampere1Write_4cyc_1L],
+        (instregex "LDR(B|D|H|Q|S)ui")>;
+def : InstRW<[Ampere1Write_4cyc_1L],
+        (instregex "LDR(D|Q|W|X)l")>;
+def : InstRW<[Ampere1Write_4cyc_1L],
+        (instregex "LDTR(B|H|W|X)i")>;
+def : InstRW<[Ampere1Write_4cyc_1L],
+        (instregex "LDTRS(BW|BX|HW|HX|W)i")>;
+def : InstRW<[Ampere1Write_4cyc_1L],
+        (instregex "LDUR(BB|HH|X|W)i")>;
+def : InstRW<[Ampere1Write_4cyc_1L],
+        (instregex "LDURS(BW|BX|HW|HX|W)i")>;
+def : InstRW<[Ampere1Write_5cyc_1AB_1L],
+        (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>;
+def : InstRW<[Ampere1Write_1cyc_1L],
+        (instrs PRFMl, PRFUMi, PRFUMi)>;
+def : InstRW<[Ampere1Write_2cyc_1AB_1L],
+        (instrs PRFMroW, PRFMroX)>;
+
+// Integer miscellaneous instructions
+def : InstRW<[Ampere1Write_1cyc_1A],  (instrs ADR, ADRP)>;
+def : InstRW<[Ampere1Write_1cyc_1B],  (instregex "EXTR(W|X)")>;
+def : InstRW<[Ampere1Write_1cyc_1B],  (instregex "(S|U)?BFM(W|X)")>;
+def : InstRW<[Ampere1Write_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>;
+def : InstRW<[Ampere1Write_1cyc_1B],  (instregex "CLS(W|X)")>;
+def : InstRW<[Ampere1Write_1cyc_1A],  (instrs SETF8, SETF16)>;
+def : InstRW<[Ampere1Write_1cyc_1AB],
+        (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
+def : InstRW<[Ampere1Write_1cyc_1B],
+        (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>;
+def : InstRW<[Ampere1Write_1cyc_1B],
+        (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>;
+
+// Integer store instructions
+def : InstRW<[Ampere1Write_1cyc_2S],  (instregex "STNP(X|W)i")>;
+def : InstRW<[Ampere1Write_2cyc_1B_1S],
+        (instrs STPWi, STPXi)>;
+def : InstRW<[Ampere1Write_3cyc_1B_1S_1AB],
+        (instregex "STP(W|X)(pre|post)")>;
+def : InstRW<[Ampere1Write_1cyc_1S],
+        (instrs STTRBi, STTRHi, STTRWi, STTRXi)>;
+def : InstRW<[Ampere1Write_1cyc_1S],
+        (instregex "STUR(BB|HH|X|W)i",
+                   "STR(X|W)ui",
+                   "STUR(BB|HH|X|W)i")>;
+def : InstRW<[Ampere1Write_1cyc_2S], (instrs STRWroX, STRXroX)>;
+def : InstRW<[Ampere1Write_2cyc_1AB_2S], (instrs STRWroW, STRXroW)>;
+
+// Pointer authentication
+//def : InstRW<[Ampere1Write_7cyc_1BS],
+//	(instrs AUTIAZ, AUTIBZ, AUTIASP, AUTIBSP, AUTIA1716, AUTIB1716)>;
+def : InstRW<[Ampere1Write_8cyc_1BS_1A],
+        (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>;
+def : InstRW<[Ampere1Write_8cyc_1BS_2A],
+        (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>;
+//def : InstRW<[Ampere1Write_7cyc_1BS],
+//	(instrs PACIAZ, PACIBZ, PACIASP, PACIBSP, PACIA1716, PACIB1716)>;
+def : InstRW<[Ampere1Write_11cyc_1BS_1L], (instregex "^LDRA(A|B)")>;
+def : InstRW<[Ampere1Write_7cyc_1BS], (instrs XPACD, XPACI)>;
+
+// Vector integer instructions
+// -- absolute difference
+def : InstRW<[Ampere1Write_3cyc_1XY],
+             (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv",
+                        "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>;
+// -- arithmetic
+def : InstRW<[Ampere1Write_3cyc_1XY],
+        (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD",
+                   "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW",
+                   "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>;
+// -- arithmetic, horizontal, 16B
+def : InstRW<[Ampere1Write_12cyc_4XY],
+            (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>;
+def : InstRW<[Ampere1Write_12cyc_4XY],
+            (instregex "^[SU](MIN|MAX)Vv16i8v")>;
+// -- arithmetic, horizontal, 4H/4S
+def : InstRW<[Ampere1Write_6cyc_2XY],
+            (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>;
+def : InstRW<[Ampere1Write_6cyc_2XY],
+            (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>;
+// -- arithmetic, horizontal, 8B/8H
+def : InstRW<[Ampere1Write_9cyc_3XY],
+            (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>;
+def : InstRW<[Ampere1Write_9cyc_3XY],
+            (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>;
+// -- arithmetic, narrowing
+def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(ADD|SUB)HNv.*")>;
+def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>;
+// -- arithmetic, pairwise
+def : InstRW<[Ampere1Write_3cyc_1XY],
+        (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>;
+// -- arithmetic, saturating
+def : InstRW<[Ampere1Write_3cyc_1XY],
+        (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>;
+// -- bit count
+def : InstRW<[Ampere1Write_2cyc_1XY],
+        (instregex "^(CLS|CLZ|CNT)v")>;
+// -- compare
+def : InstRW<[Ampere1Write_3cyc_1XY],
+        (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv",
+                   "^CMHIv", "^CMHSv")>;
+// -- compare non-zero
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^CMTSTv")>;
+// -- dot product
+def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>;
+// -- fp reciprocal estimate
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPEv", "^FRSQRTEv")>;
+// -- integer reciprocal estimate
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>;
+// -- logical
+def : InstRW<[Ampere1Write_2cyc_1XY],
+        (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
+// -- logical, narrowing
+def : InstRW<[Ampere1Write_5cyc_2XY],
+        (instregex "RSHRNv",
+                   "SHRNv", "SQSHRNv", "SQSHRUNv",
+                   "UQXTNv")>;
+// -- matrix multiply
+def : InstRW<[Ampere1Write_6cyc_2XY],
+        (instrs SMMLA, UMMLA, USMMLA)>;
+// -- max/min
+def : InstRW<[Ampere1Write_3cyc_1XY],
+        (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
+def : InstRW<[Ampere1Write_3cyc_1XY],
+        (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
+// -- move immediate
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>;
+// -- multiply
+def : InstRW<[Ampere1Write_3cyc_1XY],
+        (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>;
+// -- multiply accumulate
+def : InstRW<[Ampere1Write_3cyc_1XY],
+        (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>;
+// -- negation, saturating
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>;
+// -- reverse bits/bytes
+def : InstRW<[Ampere1Write_2cyc_1XY],
+        (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>;
+// -- shift
+def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// -- shift and accumulate
+def : InstRW<[Ampere1Write_3cyc_1XY],
+        (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>;
+// -- shift, saturating
+def : InstRW<[Ampere1Write_3cyc_1XY],
+        (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU",
+                   "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL",
+                   "^UQSHL")>;
+
+// Vector miscellaneous instructions
+// -- duplicate element
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^DUPv.+lane")>;
+// -- duplicate from GPR
+def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^DUPv.+gpr")>;
+// -- extract narrow
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^XTNv")>;
+// -- insert/extract element
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>;
+// -- move FP immediate
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOVv")>;
+// -- move element to GPR
+def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "(S|U)MOVv")>;
+// -- move from GPR to any element
+def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>;
+// -- table lookup
+def : InstRW<[Ampere1Write_2cyc_1XY],
+            (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>;
+def : InstRW<[Ampere1Write_4cyc_2XY],
+            (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>;
+def : InstRW<[Ampere1Write_6cyc_3XY],
+            (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>;
+def : InstRW<[Ampere1Write_8cyc_4XY],
+            (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>;
+// -- transpose
+def : InstRW<[Ampere1Write_2cyc_1XY],
+              (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>;
+// -- zip/unzip
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>;
+
+} // SchedModel = Ampere1Model
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td b/llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td
new file mode 100644
index 0000000..8552c07
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td
@@ -0,0 +1,25 @@
+//===- AArch64SchedPredAmpere.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 Ampere Computing processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Auxiliary predicates.
+
+// Check for a LSL shift <= 4
+def AmpereCheapLSL : MCSchedPredicate<
+                                CheckAny<[CheckShiftBy0,
+                                 CheckAll<
+                                   [CheckShiftLSL,
+                                    CheckAny<
+                                      [CheckShiftBy1,
+                                       CheckShiftBy2,
+                                       CheckShiftBy3,
+                                       CheckShiftBy4]>]>]>>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredicates.td b/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
index 5402b8b..4473f3a 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
@@ -53,7 +53,7 @@ let FunctionMapper = "AArch64_AM::getShiftType" in {
 }
 
 // Check for shifting in arithmetic and logic instructions.
-foreach I = {0-3, 8} in {
+foreach I = {0-4, 8} in {
   let FunctionMapper = "AArch64_AM::getShiftValue" in
   def CheckShiftBy#I        : CheckImmOperand<3, I>;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index b3eb65d..f9b7ca8 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -238,6 +238,12 @@ void AArch64Subtarget::initializeProperties() {
     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     MinVectorRegisterBitWidth = 128;
     break;
+  case Ampere1:
+    CacheLineSize = 64;
+    PrefFunctionLogAlignment = 6;
+    PrefLoopLogAlignment = 6;
+    MaxInterleaveFactor = 4;
+    break;
   }
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index e919263..d7878c4 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -40,6 +40,7 @@ public:
   enum ARMProcFamilyEnum : uint8_t {
     Others,
     A64FX,
+    Ampere1,
     AppleA7,
     AppleA10,
     AppleA11,
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a184159..135b94d 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -371,6 +371,49 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
         return Entry->Cost;
     break;
   }
+  case Intrinsic::fptosi_sat:
+  case Intrinsic::fptoui_sat: {
+    if (ICA.getArgTypes().empty())
+      break;
+    bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
+    auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]);
+    EVT MTy = TLI->getValueType(DL, RetTy);
+    // Check for the legal types, which are where the size of the input and the
+    // output are the same, or we are using cvt f64->i32 or f32->i64.
+    if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
+         LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
+         LT.second == MVT::v2f64) &&
+        (LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits() ||
+         (LT.second == MVT::f64 && MTy == MVT::i32) ||
+         (LT.second == MVT::f32 && MTy == MVT::i64)))
+      return LT.first;
+    // Similarly for fp16 sizes
+    if (ST->hasFullFP16() &&
+        ((LT.second == MVT::f16 && MTy == MVT::i32) ||
+         ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
+          (LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits()))))
+      return LT.first;
+
+    // Otherwise we use a legal convert followed by a min+max
+    if ((LT.second.getScalarType() == MVT::f32 ||
+         LT.second.getScalarType() == MVT::f64 ||
+         (ST->hasFullFP16() && LT.second.getScalarType() == MVT::f16)) &&
+        LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) {
+      Type *LegalTy =
+          Type::getIntNTy(RetTy->getContext(), LT.second.getScalarSizeInBits());
+      if (LT.second.isVector())
+        LegalTy = VectorType::get(LegalTy, LT.second.getVectorElementCount());
+      InstructionCost Cost = 1;
+      IntrinsicCostAttributes Attrs1(IsSigned ? Intrinsic::smin : Intrinsic::umin,
+                                    LegalTy, {LegalTy, LegalTy});
+      Cost += getIntrinsicInstrCost(Attrs1, CostKind);
+      IntrinsicCostAttributes Attrs2(IsSigned ? Intrinsic::smax : Intrinsic::umax,
+                                    LegalTy, {LegalTy, LegalTy});
+      Cost += getIntrinsicInstrCost(Attrs2, CostKind);
+      return LT.first * Cost;
+    }
+    break;
+  }
   default:
     break;
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index 805b6c7..bfe2e9b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -126,7 +126,6 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
          LLT::scalar(64));
   const LLT S32 = LLT::scalar(32);
 
-  B.setMBB(*MI.getParent());
   B.setInstrAndDebugLoc(MI);
 
   auto Unmerge = B.buildUnmerge(S32, Src);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 76c8edc..a8310c2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -737,7 +737,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::BUILD_VECTOR, Vec16, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec16, Custom);
       setOperationAction(ISD::INSERT_VECTOR_ELT, Vec16, Expand);
-      setOperationAction(ISD::SCALAR_TO_VECTOR, Vec16, Expand);
+      setOperationAction(ISD::SCALAR_TO_VECTOR, Vec16, Custom);
     }
   }
 
@@ -4772,6 +4772,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::VECTOR_SHUFFLE:
     return lowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::SCALAR_TO_VECTOR:
+    return lowerSCALAR_TO_VECTOR(Op, DAG);
   case ISD::BUILD_VECTOR:
     return lowerBUILD_VECTOR(Op, DAG);
   case ISD::FP_ROUND:
@@ -5768,14 +5770,11 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
   EVT EltVT = VecVT.getVectorElementType();
   unsigned VecSize = VecVT.getSizeInBits();
   unsigned EltSize = EltVT.getSizeInBits();
+  SDLoc SL(Op);
 
-
-  assert(VecSize <= 64);
-
+  // Specially handle the case of v4i16 with static indexing.
   unsigned NumElts = VecVT.getVectorNumElements();
-  SDLoc SL(Op);
   auto KIdx = dyn_cast<ConstantSDNode>(Idx);
-
   if (NumElts == 4 && EltSize == 16 && KIdx) {
     SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Vec);
 
@@ -5803,35 +5802,41 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
     return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat);
   }
 
+  // Static indexing does not lower to stack access, and hence there is no need
+  // for special custom lowering to avoid stack access.
   if (isa<ConstantSDNode>(Idx))
     return SDValue();
 
-  MVT IntVT = MVT::getIntegerVT(VecSize);
-
-  // Avoid stack access for dynamic indexing.
+  // Avoid stack access for dynamic indexing by custom lowering to
   // v_bfi_b32 (v_bfm_b32 16, (shl idx, 16)), val, vec
 
-  // Create a congruent vector with the target value in each element so that
-  // the required element can be masked and ORed into the target vector.
-  SDValue ExtVal = DAG.getNode(ISD::BITCAST, SL, IntVT,
-                               DAG.getSplatBuildVector(VecVT, SL, InsVal));
+  assert(VecSize <= 64 && "Expected target vector size to be <= 64 bits");
 
+  MVT IntVT = MVT::getIntegerVT(VecSize);
+
+  // Convert vector index to bit-index and get the required bit mask.
   assert(isPowerOf2_32(EltSize));
   SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32);
-
-  // Convert vector index to bit-index.
   SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx, ScaleFactor);
-
-  SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec);
   SDValue BFM = DAG.getNode(ISD::SHL, SL, IntVT,
                             DAG.getConstant(0xffff, SL, IntVT),
                             ScaledIdx);
 
+  // 1. Create a congruent vector with the target value in each element.
+  SDValue ExtVal = DAG.getNode(ISD::BITCAST, SL, IntVT,
+                               DAG.getSplatBuildVector(VecVT, SL, InsVal));
+
+  // 2. Mask off all other indicies except the required index within (1).
   SDValue LHS = DAG.getNode(ISD::AND, SL, IntVT, BFM, ExtVal);
+
+  // 3. Mask off the required index within the target vector.
+  SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec);
   SDValue RHS = DAG.getNode(ISD::AND, SL, IntVT,
                             DAG.getNOT(SL, BFM, IntVT), BCVec);
 
+  // 4. Get (2) and (3) ORed into the target vector.
   SDValue BFI = DAG.getNode(ISD::OR, SL, IntVT, LHS, RHS);
+
   return DAG.getNode(ISD::BITCAST, SL, VecVT, BFI);
 }
 
@@ -5954,6 +5959,22 @@ SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
   return DAG.getNode(ISD::CONCAT_VECTORS, SL, ResultVT, Pieces);
 }
 
+SDValue SITargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  SDValue SVal = Op.getOperand(0);
+  EVT ResultVT = Op.getValueType();
+  EVT SValVT = SVal.getValueType();
+  SDValue UndefVal = DAG.getUNDEF(SValVT);
+  SDLoc SL(Op);
+
+  SmallVector<SDValue, 8> VElts;
+  VElts.push_back(SVal);
+  for (int I = 1, E = ResultVT.getVectorNumElements(); I < E; ++I)
+    VElts.push_back(UndefVal);
+
+  return DAG.getBuildVector(ResultVT, SL, VElts);
+}
+
 SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
                                             SelectionDAG &DAG) const {
   SDLoc SL(Op);
@@ -10661,39 +10682,64 @@ static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL,
   return DAG.getNode(ISD::TRUNCATE, SL, VT, Mad);
 }
 
-SDValue SITargetLowering::performAddCombine(SDNode *N,
+// Fold (add (mul x, y), z) --> (mad_[iu]64_[iu]32 x, y, z).
+SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
                                             DAGCombinerInfo &DCI) const {
+  assert(N->getOpcode() == ISD::ADD);
+
   SelectionDAG &DAG = DCI.DAG;
   EVT VT = N->getValueType(0);
   SDLoc SL(N);
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
 
-  if ((LHS.getOpcode() == ISD::MUL || RHS.getOpcode() == ISD::MUL)
-      && Subtarget->hasMad64_32() &&
-      !VT.isVector() && VT.getScalarSizeInBits() > 32 &&
-      VT.getScalarSizeInBits() <= 64) {
-    if (LHS.getOpcode() != ISD::MUL)
-      std::swap(LHS, RHS);
+  if (VT.isVector())
+    return SDValue();
 
-    SDValue MulLHS = LHS.getOperand(0);
-    SDValue MulRHS = LHS.getOperand(1);
-    SDValue AddRHS = RHS;
+  unsigned NumBits = VT.getScalarSizeInBits();
+  if (NumBits <= 32 || NumBits > 64)
+    return SDValue();
 
-    // TODO: Maybe restrict if SGPR inputs.
-    if (numBitsUnsigned(MulLHS, DAG) <= 32 &&
-        numBitsUnsigned(MulRHS, DAG) <= 32) {
-      MulLHS = DAG.getZExtOrTrunc(MulLHS, SL, MVT::i32);
-      MulRHS = DAG.getZExtOrTrunc(MulRHS, SL, MVT::i32);
-      AddRHS = DAG.getZExtOrTrunc(AddRHS, SL, MVT::i64);
-      return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, false);
-    }
+  if (LHS.getOpcode() != ISD::MUL) {
+    assert(RHS.getOpcode() == ISD::MUL);
+    std::swap(LHS, RHS);
+  }
+
+  SDValue MulLHS = LHS.getOperand(0);
+  SDValue MulRHS = LHS.getOperand(1);
+  SDValue AddRHS = RHS;
+
+  // TODO: Maybe restrict if SGPR inputs.
+  if (numBitsUnsigned(MulLHS, DAG) <= 32 &&
+      numBitsUnsigned(MulRHS, DAG) <= 32) {
+    MulLHS = DAG.getZExtOrTrunc(MulLHS, SL, MVT::i32);
+    MulRHS = DAG.getZExtOrTrunc(MulRHS, SL, MVT::i32);
+    AddRHS = DAG.getZExtOrTrunc(AddRHS, SL, MVT::i64);
+    return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, false);
+  }
+
+  if (numBitsSigned(MulLHS, DAG) <= 32 && numBitsSigned(MulRHS, DAG) <= 32) {
+    MulLHS = DAG.getSExtOrTrunc(MulLHS, SL, MVT::i32);
+    MulRHS = DAG.getSExtOrTrunc(MulRHS, SL, MVT::i32);
+    AddRHS = DAG.getSExtOrTrunc(AddRHS, SL, MVT::i64);
+    return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, true);
+  }
+
+  return SDValue();
+}
+
+SDValue SITargetLowering::performAddCombine(SDNode *N,
+                                            DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+  SDLoc SL(N);
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
 
-    if (numBitsSigned(MulLHS, DAG) <= 32 && numBitsSigned(MulRHS, DAG) <= 32) {
-      MulLHS = DAG.getSExtOrTrunc(MulLHS, SL, MVT::i32);
-      MulRHS = DAG.getSExtOrTrunc(MulRHS, SL, MVT::i32);
-      AddRHS = DAG.getSExtOrTrunc(AddRHS, SL, MVT::i64);
-      return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, true);
+  if (LHS.getOpcode() == ISD::MUL || RHS.getOpcode() == ISD::MUL) {
+    if (Subtarget->hasMad64_32()) {
+      if (SDValue Folded = tryFoldToMad64_32(N, DCI))
+        return Folded;
     }
 
     return SDValue();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 72a00c1..18bb9fb 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -151,6 +151,7 @@ private:
   SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
@@ -197,6 +198,7 @@ private:
   SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const;
   unsigned getFusedOpcode(const SelectionDAG &DAG,
                           const SDNode *N0, const SDNode *N1) const;
+  SDValue tryFoldToMad64_32(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index c2cc302..b2765b2 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -2562,8 +2562,9 @@ void ARMFrameLowering::adjustForSegmentedStacks(
     // Make sure the LiveIns are still sorted and unique.
     MBB->sortUniqueLiveIns();
     // Replace the edges to PrologueMBB by edges to the sequences
-    // we are about to add.
-    MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
+    // we are about to add, but only update for immediate predecessors.
+    if (MBB->isSuccessor(&PrologueMBB))
+      MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
   }
 
   // The required stack size that is aligned to ARM constant criterion.
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index bbd5f5f..44a323d 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -376,7 +376,8 @@ def ProcessorFeatures {
      FeaturePartwordAtomic,
      FeatureQuadwordAtomic,
      FeaturePredictableSelectIsExpensive,
-     FeatureISA2_07
+     FeatureISA2_07,
+     FeatureCRBits
     ];
 
   list<SubtargetFeature> P8SpecificFeatures = [FeatureAddiLoadFusion,
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 9d8290f..5b28f0d 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -32,7 +32,7 @@ class PassRegistry;
 
 bool lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                     AsmPrinter &AP);
-bool LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
+bool lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
                                          MCOperand &MCOp, const AsmPrinter &AP);
 
 FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM);
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 7d1ec2a..5b2a247 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -63,7 +63,7 @@ public:
 
   // Wrapper needed for tblgenned pseudo lowering.
   bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
-    return LowerRISCVMachineOperandToMCOperand(MO, MCOp, *this);
+    return lowerRISCVMachineOperandToMCOperand(MO, MCOp, *this);
   }
 
   void emitStartOfAsmFile(Module &M) override;
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index de9c151..7fae031 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1121,16 +1121,15 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
       SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
 
       // Otherwise use
-      // vmslt{u}.vx vd, va, x, v0.t; if mask policy is agnostic.
+      // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
+      // The result is mask undisturbed.
+      // We use the same instructions to emulate mask agnostic behavior, because
+      // the agnostic result can be either undisturbed or all 1.
       SDValue Cmp = SDValue(
           CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
                                  {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
           0);
-      if (MaskedOff.isUndef()) {
-        ReplaceNode(Node, Cmp.getNode());
-        return;
-      }
-      // Need vmxor.mm vd, vd, v0 to assign inactive value.
+      // vmxor.mm vd, vd, v0 is used to update active value.
       ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
                                                {Cmp, Mask, VL, MaskSEW}));
       return;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4cb3188..ff63b22 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -6918,7 +6918,10 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
     SDValue Overflow;
     if (IsAdd && isOneConstant(RHS)) {
       // Special case uaddo X, 1 overflowed if the addition result is 0.
-      // FIXME: We can do this for any constant RHS by using (X + C) < C.
+      // The general case (X + C) < C is not necessarily beneficial. Although we
+      // reduce the live range of X, we may introduce the materialization of
+      // constant C, especially when the setcc result is used by branch. We have
+      // no compare with constant and branch instructions.
       Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
                               DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
     } else {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index f7a1998..3831dc5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -114,11 +114,11 @@ defm : FPFMADynFrmAlias_m<FNMSUB_D, "fnmsub.d", DINX>;
 defm : FPFMADynFrmAlias_m<FNMADD_D, "fnmadd.d", DINX>;
 
 let SchedRW = [WriteFALU64, ReadFALU64, ReadFALU64] in {
-defm FADD_D : FPALU_rr_frm_m<0b0000001, "fadd.d", DINX>;
+defm FADD_D : FPALU_rr_frm_m<0b0000001, "fadd.d", DINX, /*Commutable*/1>;
 defm FSUB_D : FPALU_rr_frm_m<0b0000101, "fsub.d", DINX>;
 }
 let SchedRW = [WriteFMul64, ReadFMul64, ReadFMul64] in
-defm FMUL_D : FPALU_rr_frm_m<0b0001001, "fmul.d", DINX>;
+defm FMUL_D : FPALU_rr_frm_m<0b0001001, "fmul.d", DINX, /*Commutable*/1>;
 
 let SchedRW = [WriteFDiv64, ReadFDiv64, ReadFDiv64] in
 defm FDIV_D : FPALU_rr_frm_m<0b0001101, "fdiv.d", DINX>;
@@ -140,8 +140,8 @@ defm FSGNJX_D : FPALU_rr_m<0b0010001, 0b010, "fsgnjx.d", DINX>;
 }
 
 let SchedRW = [WriteFMinMax64, ReadFMinMax64, ReadFMinMax64] in {
-defm FMIN_D   : FPALU_rr_m<0b0010101, 0b000, "fmin.d", DINX>;
-defm FMAX_D   : FPALU_rr_m<0b0010101, 0b001, "fmax.d", DINX>;
+defm FMIN_D   : FPALU_rr_m<0b0010101, 0b000, "fmin.d", DINX, /*Commutable*/1>;
+defm FMAX_D   : FPALU_rr_m<0b0010101, 0b001, "fmax.d", DINX, /*Commutable*/1>;
 }
 
 defm FCVT_S_D : FPUnaryOp_r_frm_m<0b0100000, 0b00001, FDINX, "fcvt.s.d">,
@@ -152,7 +152,7 @@ defm FCVT_D_S : FPUnaryOp_r_m<0b0100001, 0b00000, 0b000, DFINX, "fcvt.d.s">,
                 Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>;
 
 let SchedRW = [WriteFCmp64, ReadFCmp64, ReadFCmp64] in {
-defm FEQ_D : FPCmp_rr_m<0b1010001, 0b010, "feq.d", DINX>;
+defm FEQ_D : FPCmp_rr_m<0b1010001, 0b010, "feq.d", DINX, /*Commutable*/1>;
 defm FLT_D : FPCmp_rr_m<0b1010001, 0b001, "flt.d", DINX>;
 defm FLE_D : FPCmp_rr_m<0b1010001, 0b000, "fle.d", DINX>;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index a2cd4a0..b1077ae 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -187,28 +187,32 @@ multiclass FPFMADynFrmAlias_m<FPFMA_rrr_frm Inst, string OpcodeStr,
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in
 class FPALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
-               DAGOperand rty>
+               DAGOperand rty, bit Commutable>
     : RVInstR<funct7, funct3, OPC_OP_FP, (outs rty:$rd),
-              (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+              (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2"> {
+  let isCommutable = Commutable;
+}
 multiclass FPALU_rr_m<bits<7> funct7, bits<3> funct3, string opcodestr,
-                      list<ExtInfo_r> Exts> {
+                      list<ExtInfo_r> Exts, bit Commutable = 0> {
   foreach Ext = Exts in
     let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
-    def Ext.Suffix : FPALU_rr<funct7, funct3, opcodestr, Ext.Reg>;
+    def Ext.Suffix : FPALU_rr<funct7, funct3, opcodestr, Ext.Reg, Commutable>;
 }
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
     UseNamedOperandTable = 1, hasPostISelHook = 1 in
-class FPALU_rr_frm<bits<7> funct7, string opcodestr, DAGOperand rty>
+class FPALU_rr_frm<bits<7> funct7, string opcodestr, DAGOperand rty,
+                   bit Commutable>
     : RVInstRFrm<funct7, OPC_OP_FP, (outs rty:$rd),
                  (ins rty:$rs1, rty:$rs2, frmarg:$frm), opcodestr,
-                  "$rd, $rs1, $rs2, $frm">;
-
+                  "$rd, $rs1, $rs2, $frm"> {
+  let isCommutable = Commutable;
+}
 multiclass FPALU_rr_frm_m<bits<7> funct7, string opcodestr,
-                          list<ExtInfo_r> Exts> {
+                          list<ExtInfo_r> Exts, bit Commutable = 0> {
   foreach Ext = Exts in
     let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
-    def Ext.Suffix : FPALU_rr_frm<funct7, opcodestr, Ext.Reg>;
+    def Ext.Suffix : FPALU_rr_frm<funct7, opcodestr, Ext.Reg, Commutable>;
 }
 
 class FPALUDynFrmAlias<FPALU_rr_frm Inst, string OpcodeStr,
@@ -269,14 +273,16 @@ multiclass FPUnaryOpDynFrmAlias_m<FPUnaryOp_r_frm Inst, string OpcodeStr,
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in
 class FPCmp_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
-               DAGOperand rty>
+               DAGOperand rty, bit Commutable>
     : RVInstR<funct7, funct3, OPC_OP_FP, (outs GPR:$rd),
-              (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+              (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2"> {
+  let isCommutable = Commutable;
+}
 multiclass FPCmp_rr_m<bits<7> funct7, bits<3> funct3, string opcodestr,
-                      list<ExtInfo_r> Exts> {
+                      list<ExtInfo_r> Exts, bit Commutable = 0> {
   foreach Ext = Exts in
     let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
-    def Ext.Suffix : FPCmp_rr<funct7, funct3, opcodestr, Ext.Reg>;
+    def Ext.Suffix : FPCmp_rr<funct7, funct3, opcodestr, Ext.Reg, Commutable>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -305,11 +311,11 @@ defm : FPFMADynFrmAlias_m<FNMSUB_S, "fnmsub.s", FINX>;
 defm : FPFMADynFrmAlias_m<FNMADD_S, "fnmadd.s", FINX>;
 
 let SchedRW = [WriteFALU32, ReadFALU32, ReadFALU32] in {
-defm FADD_S : FPALU_rr_frm_m<0b0000000, "fadd.s", FINX>;
+defm FADD_S : FPALU_rr_frm_m<0b0000000, "fadd.s", FINX, /*Commutable*/1>;
 defm FSUB_S : FPALU_rr_frm_m<0b0000100, "fsub.s", FINX>;
 }
 let SchedRW = [WriteFMul32, ReadFMul32, ReadFMul32] in
-defm FMUL_S : FPALU_rr_frm_m<0b0001000, "fmul.s", FINX>;
+defm FMUL_S : FPALU_rr_frm_m<0b0001000, "fmul.s", FINX, /*Commutable*/1>;
 
 let SchedRW = [WriteFDiv32, ReadFDiv32, ReadFDiv32] in
 defm FDIV_S : FPALU_rr_frm_m<0b0001100, "fdiv.s", FINX>;
@@ -331,8 +337,8 @@ defm FSGNJX_S : FPALU_rr_m<0b0010000, 0b010, "fsgnjx.s", FINX>;
 }
 
 let SchedRW = [WriteFMinMax32, ReadFMinMax32, ReadFMinMax32] in {
-defm FMIN_S   : FPALU_rr_m<0b0010100, 0b000, "fmin.s", FINX>;
-defm FMAX_S   : FPALU_rr_m<0b0010100, 0b001, "fmax.s", FINX>;
+defm FMIN_S   : FPALU_rr_m<0b0010100, 0b000, "fmin.s", FINX, /*Commutable*/1>;
+defm FMAX_S   : FPALU_rr_m<0b0010100, 0b001, "fmax.s", FINX, /*Commutable*/1>;
 }
 
 defm FCVT_W_S : FPUnaryOp_r_frm_m<0b1100000, 0b00000, XFINX, "fcvt.w.s">,
@@ -348,7 +354,7 @@ def FMV_X_W : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR32, "fmv.x.w">,
               Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>;
 
 let SchedRW = [WriteFCmp32, ReadFCmp32, ReadFCmp32] in {
-defm FEQ_S : FPCmp_rr_m<0b1010000, 0b010, "feq.s", FINX>;
+defm FEQ_S : FPCmp_rr_m<0b1010000, 0b010, "feq.s", FINX, /*Commutable*/1>;
 defm FLT_S : FPCmp_rr_m<0b1010000, 0b001, "flt.s", FINX>;
 defm FLE_S : FPCmp_rr_m<0b1010000, 0b000, "fle.s", FINX>;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index edaf158..835a0f5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -109,11 +109,11 @@ defm : FPFMADynFrmAlias_m<FNMSUB_H, "fnmsub.h", HINX>;
 defm : FPFMADynFrmAlias_m<FNMADD_H, "fnmadd.h", HINX>;
 
 let SchedRW = [WriteFALU16, ReadFALU16, ReadFALU16] in {
-defm FADD_H : FPALU_rr_frm_m<0b0000010, "fadd.h", HINX>;
+defm FADD_H : FPALU_rr_frm_m<0b0000010, "fadd.h", HINX, /*Commutable*/1>;
 defm FSUB_H : FPALU_rr_frm_m<0b0000110, "fsub.h", HINX>;
 }
 let SchedRW = [WriteFMul16, ReadFMul16, ReadFMul16] in
-defm FMUL_H : FPALU_rr_frm_m<0b0001010, "fmul.h", HINX>;
+defm FMUL_H : FPALU_rr_frm_m<0b0001010, "fmul.h", HINX, /*Commutable*/1>;
 
 let SchedRW = [WriteFDiv16, ReadFDiv16, ReadFDiv16] in
 defm FDIV_H : FPALU_rr_frm_m<0b0001110, "fdiv.h", HINX>;
@@ -135,8 +135,8 @@ defm FSGNJX_H : FPALU_rr_m<0b0010010, 0b010, "fsgnjx.h", HINX>;
 }
 
 let SchedRW = [WriteFMinMax16, ReadFMinMax16, ReadFMinMax16] in {
-defm FMIN_H   : FPALU_rr_m<0b0010110, 0b000, "fmin.h", HINX>;
-defm FMAX_H   : FPALU_rr_m<0b0010110, 0b001, "fmax.h", HINX>;
+defm FMIN_H   : FPALU_rr_m<0b0010110, 0b000, "fmin.h", HINX, /*Commutable*/1>;
+defm FMAX_H   : FPALU_rr_m<0b0010110, 0b001, "fmax.h", HINX, /*Commutable*/1>;
 }
 
 defm FCVT_W_H : FPUnaryOp_r_frm_m<0b1100010, 0b00000, XHINX, "fcvt.w.h">,
@@ -173,7 +173,7 @@ def FMV_H_X : FPUnaryOp_r<0b1111010, 0b00000, 0b000, FPR16, GPR, "fmv.h.x">,
 } // Predicates = [HasStdExtZfhOrZfhmin]
 
 let SchedRW = [WriteFCmp16, ReadFCmp16, ReadFCmp16] in {
-defm FEQ_H : FPCmp_rr_m<0b1010010, 0b010, "feq.h", HINX>;
+defm FEQ_H : FPCmp_rr_m<0b1010010, 0b010, "feq.h", HINX, /*Commutable*/1>;
 defm FLT_H : FPCmp_rr_m<0b1010010, 0b001, "flt.h", HINX>;
 defm FLE_H : FPCmp_rr_m<0b1010010, 0b000, "fle.h", HINX>;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
index c167c09..4b34bba 100644
--- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
@@ -87,7 +87,7 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
   return MCOperand::createExpr(ME);
 }
 
-bool llvm::LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
+bool llvm::lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
                                                MCOperand &MCOp,
                                                const AsmPrinter &AP) {
   switch (MO.getType()) {
@@ -214,7 +214,7 @@ bool llvm::lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
 
   for (const MachineOperand &MO : MI->operands()) {
     MCOperand MCOp;
-    if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))
+    if (lowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))
       OutMI.addOperand(MCOp);
   }
 
diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 4f37acc..60e1b05 100644
--- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -1590,9 +1590,11 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
   if (getParser().parseExpression(Expr))
     return MatchOperand_NoMatch;
 
-  auto isOutOfRangeConstant = [&](const MCExpr *E) -> bool {
+  auto isOutOfRangeConstant = [&](const MCExpr *E, bool Negate) -> bool {
     if (auto *CE = dyn_cast<MCConstantExpr>(E)) {
       int64_t Value = CE->getValue();
+      if (Negate)
+        Value = -Value;
       if ((Value & 1) || Value < MinVal || Value > MaxVal)
         return true;
     }
@@ -1606,7 +1608,7 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
       Error(StartLoc, "Expected PC-relative expression");
       return MatchOperand_ParseFail;
     }
-    if (isOutOfRangeConstant(CE)) {
+    if (isOutOfRangeConstant(CE, false)) {
       Error(StartLoc, "offset out of range");
       return MatchOperand_ParseFail;
     }
@@ -1621,8 +1623,9 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
   // For consistency with the GNU assembler, conservatively assume that a
   // constant offset must by itself be within the given size range.
   if (const auto *BE = dyn_cast<MCBinaryExpr>(Expr))
-    if (isOutOfRangeConstant(BE->getLHS()) ||
-        isOutOfRangeConstant(BE->getRHS())) {
+    if (isOutOfRangeConstant(BE->getLHS(), false) ||
+        isOutOfRangeConstant(BE->getRHS(),
+                             BE->getOpcode() == MCBinaryExpr::Sub)) {
       Error(StartLoc, "offset out of range");
       return MatchOperand_ParseFail;
     }
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index d825981..368b05e 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -48,14 +48,18 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
                      VEX, T8XD;
 
     // Pseduo instruction for RA.
+    let mayLoad = 1 in
     def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src),
                               [(int_x86_ldtilecfg_internal addr:$src)]>;
+    let mayLoad = 1 in
     def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
                                                      GR16:$src2,
                                                      opaquemem:$src3), []>;
+    let mayLoad = 1 in
     def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
                                                        GR16:$src2,
                                                        opaquemem:$src3), []>;
+    let mayStore = 1 in
     def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
                                             GR16:$src2, opaquemem:$src3,
                                             TILE:$src4), []>;
@@ -67,9 +71,12 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
     let usesCustomInserter = 1 in {
       // Pseudo instructions, using immediates instead of tile registers.
       // To be translated to the actual instructions in X86ISelLowering.cpp
+      let mayLoad = 1 in
       def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
+      let mayLoad = 1 in
       def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1,
                                           sibmem:$src2), []>;
+      let mayStore = 1 in
       def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
       def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
                               [(int_x86_tilezero timm:$src)]>;
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 0ad3e6c..81f258d 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -964,7 +964,7 @@ static void combineLoadCast(IntrinsicInst *Cast, LoadInst *LD) {
 static bool combineLdSt(SmallVectorImpl<Instruction *> &Casts) {
   bool Change = false;
   for (auto *Cast : Casts) {
-    IntrinsicInst *II = dyn_cast<IntrinsicInst>(Cast);
+    auto *II = cast<IntrinsicInst>(Cast);
     // %43 = call <256 x i32> @llvm.x86.cast.tile.to.vector(x86_amx %42)
     // store <256 x i32> %43, <256 x i32>* %p, align 64
     // -->
@@ -984,7 +984,7 @@ static bool combineLdSt(SmallVectorImpl<Instruction *> &Casts) {
         Store->eraseFromParent();
     } else { // x86_cast_vector_to_tile
       SmallVector<Instruction *, 2> DeadLoads;
-      LoadInst *Load = dyn_cast<LoadInst>(Cast->getOperand(0));
+      auto *Load = dyn_cast<LoadInst>(Cast->getOperand(0));
       if (!Load || !Load->hasOneUse())
         continue;
       // %65 = load <256 x i32>, <256 x i32>* %p, align 64
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 67ed1d8..05364e3 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -935,8 +935,7 @@ def SKLWriteResGroup58 : SchedWriteRes<[SKLPort23]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)",
-                                             "MOVZX(16|32|64)rm(8|16)",
-                                             "(V?)MOVDDUPrm")>; // TODO: Should this be SKLWriteResGroup67?
+                                             "MOVZX(16|32|64)rm(8|16)")>;
 
 def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> {
   let Latency = 5;
@@ -993,7 +992,8 @@ def: InstRW<[SKLWriteResGroup67], (instrs VBROADCASTSSrm,
                                           VPBROADCASTDrm,
                                           VPBROADCASTQrm)>;
 def: InstRW<[SKLWriteResGroup67], (instregex "(V?)MOVSHDUPrm",
-                                             "(V?)MOVSLDUPrm")>;
+                                             "(V?)MOVSLDUPrm",
+                                             "(V?)MOVDDUPrm")>;
 
 def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> {
   let Latency = 6;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 0189acd..b682b51 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -1055,8 +1055,7 @@ def SKXWriteResGroup58 : SchedWriteRes<[SKXPort23]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)",
-                                             "MOVZX(16|32|64)rm(8|16)",
-                                             "(V?)MOVDDUPrm")>;  // TODO: Should this be SKXWriteResGroup71?
+                                             "MOVZX(16|32|64)rm(8|16)")>;
 
 def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> {
   let Latency = 5;
@@ -1159,11 +1158,10 @@ def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> {
 }
 def: InstRW<[SKXWriteResGroup71], (instrs VBROADCASTSSrm,
                                           VPBROADCASTDrm,
-                                          VPBROADCASTQrm,
-                                          VMOVSHDUPrm,
-                                          VMOVSLDUPrm,
-                                          MOVSHDUPrm,
-                                          MOVSLDUPrm)>;
+                                          VPBROADCASTQrm)>;
+def: InstRW<[SKXWriteResGroup71], (instregex "(V?)MOVSHDUPrm",
+                                             "(V?)MOVSLDUPrm",
+                                             "(V?)MOVDDUPrm")>;
 
 def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> {
   let Latency = 6;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index 2103169..5051d4c 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -485,12 +485,6 @@ defm : Zn2WriteResFpuPair<WriteFVarShuffle256, [Zn2FPU], 100>;
 def Zn2WriteMicrocoded : SchedWriteRes<[]> {
   let Latency = 100;
 }
-defm : Zn2WriteResPair<WriteDPPS, [], 15>;
-defm : Zn2WriteResPair<WriteFHAdd, [], 7>;
-defm : Zn2WriteResPair<WriteFHAddY, [], 7>;
-defm : Zn2WriteResPair<WritePHAdd, [], 3>;
-defm : Zn2WriteResPair<WritePHAddX, [], 3>;
-defm : Zn2WriteResPair<WritePHAddY, [], 3>;
 
 def : SchedAlias<WriteMicrocoded, Zn2WriteMicrocoded>;
 def : SchedAlias<WriteFCMOV, Zn2WriteMicrocoded>;
@@ -1108,6 +1102,14 @@ def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
 
 //-- Arithmetic instructions --//
 
+// HADD, HSUB PS/PD
+// PHADD|PHSUB (S) W/D.
+defm : Zn2WriteResPair<WriteFHAdd, [], 7>;
+defm : Zn2WriteResPair<WriteFHAddY, [], 7>;
+defm : Zn2WriteResPair<WritePHAdd, [], 3>;
+defm : Zn2WriteResPair<WritePHAddX, [], 3>;
+defm : Zn2WriteResPair<WritePHAddY, [], 3>;
+
 // PCMPGTQ.
 def Zn2WritePCMPGTQr : SchedWriteRes<[Zn2FPU03]>;
 def : InstRW<[Zn2WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
@@ -1478,6 +1480,7 @@ def : SchedAlias<WriteFDiv64YLd, Zn2WriteVDIVPDYLd>;
 
 // DPPS.
 // x,x,i / v,v,v,i.
+defm : Zn2WriteResPair<WriteDPPS, [], 15>;
 def : SchedAlias<WriteDPPSY,  Zn2WriteMicrocoded>;
 
 // x,m,i / v,v,m,i.
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 7f4ca3a..a0bde8dc 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3833,10 +3833,21 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
     }
   }
 
-  // TODO: Use default extraction for now, but we should investigate extending this
-  // to handle repeated subvector extraction.
-  if (Extract)
+  if (Extract) {
+    // vXi1 can be efficiently extracted with MOVMSK.
+    // TODO: AVX512 predicate mask handling.
+    // NOTE: This doesn't work well for roundtrip scalarization.
+    if (!Insert && Ty->getScalarSizeInBits() == 1 && !ST->hasAVX512()) {
+      unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
+      unsigned MaxElts = ST->hasAVX2() ? 32 : 16;
+      unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
+      return MOVMSKCost;
+    }
+
+    // TODO: Use default extraction for now, but we should investigate extending
+    // this to handle repeated subvector extraction.
     Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, false, Extract);
+  }
 
   return Cost;
 }
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 65bc392..a33cb0b 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -218,10 +218,17 @@ static Function *doPromotion(
   LLVM_DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
                     << "From: " << *F);
 
+  uint64_t LargestVectorWidth = 0;
+  for (auto *I : Params)
+    if (auto *VT = dyn_cast<llvm::VectorType>(I))
+      LargestVectorWidth = std::max(
+          LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize());
+
   // Recompute the parameter attributes list based on the new arguments for
   // the function.
   NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
                                        PAL.getRetAttrs(), ArgAttrVec));
+  AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth);
   ArgAttrVec.clear();
 
   F->getParent()->getFunctionList().insert(F->getIterator(), NF);
@@ -313,6 +320,9 @@ static Function *doPromotion(
     Args.clear();
     ArgAttrVec.clear();
 
+    AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(),
+                                                  LargestVectorWidth);
+
     // Update the callgraph to know that the callsite has been transformed.
     if (ReplaceCallSite)
       (*ReplaceCallSite)(CB, *NewCS);
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 247da10..40dd6ee 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -2486,6 +2486,12 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
       }
     }
 
+    uint64_t LargestVectorWidth = 0;
+    for (auto *I : NewArgumentTypes)
+      if (auto *VT = dyn_cast<llvm::VectorType>(I))
+        LargestVectorWidth = std::max(
+            LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize());
+
     FunctionType *OldFnTy = OldFn->getFunctionType();
     Type *RetTy = OldFnTy->getReturnType();
 
@@ -2515,6 +2521,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
     NewFn->setAttributes(AttributeList::get(
         Ctx, OldFnAttributeList.getFnAttrs(), OldFnAttributeList.getRetAttrs(),
         NewArgumentAttributes));
+    AttributeFuncs::updateMinLegalVectorWidthAttr(*NewFn, LargestVectorWidth);
 
     // Since we have now created the new function, splice the body of the old
     // function right into the new function, leaving the old rotting hulk of the
@@ -2592,6 +2599,9 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
           Ctx, OldCallAttributeList.getFnAttrs(),
           OldCallAttributeList.getRetAttrs(), NewArgOperandAttributes));
 
+      AttributeFuncs::updateMinLegalVectorWidthAttr(*NewCB->getCaller(),
+                                                    LargestVectorWidth);
+
       CallSitePairs.push_back({OldCB, NewCB});
       return true;
     };
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 72b94cd..157cb27e 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/iterator_range.h"
@@ -904,7 +905,7 @@ OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI,
     }
   }
 
-  SmallPtrSet<Constant *, 1> RepValues;
+  SmallSetVector<Constant *, 1> RepValues;
   RepValues.insert(NewGV);
 
   // If there is a comparison against null, we will insert a global bool to
diff --git a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
index 6ec3c61..76f8f1a 100644
--- a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -29,7 +29,7 @@ static bool inferAllPrototypeAttributes(
     // explicitly visited by CGSCC passes in the new pass manager.)
     if (F.isDeclaration() && !F.hasOptNone()) {
       if (!F.hasFnAttribute(Attribute::NoBuiltin))
-        Changed |= inferLibFuncAttributes(F, GetTLI(F));
+        Changed |= inferNonMandatoryLibFuncAttrs(F, GetTLI(F));
       Changed |= inferAttributesFromOthers(F);
     }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 4a62afc..39a32e5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -759,7 +759,7 @@ getAsConstantIndexedAddress(Type *ElemTy, Value *V, const DataLayout &DL) {
         V = GEP->getOperand(0);
         Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));
         Index = ConstantExpr::getAdd(
-            Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType));
+            Index, ConstantExpr::getSExtOrTrunc(GEPIndex, IndexType));
         continue;
       }
       break;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index ab35698..b044b8a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1298,6 +1298,8 @@ static Instruction *foldFDivPowDivisor(BinaryOperator &I,
 }
 
 Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
+  Module *M = I.getModule();
+
   if (Value *V = SimplifyFDivInst(I.getOperand(0), I.getOperand(1),
                                   I.getFastMathFlags(),
                                   SQ.getWithInstruction(&I)))
@@ -1363,8 +1365,8 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
         !IsTan && match(Op0, m_Intrinsic<Intrinsic::cos>(m_Value(X))) &&
                   match(Op1, m_Intrinsic<Intrinsic::sin>(m_Specific(X)));
 
-    if ((IsTan || IsCot) &&
-        hasFloatFn(&TLI, I.getType(), LibFunc_tan, LibFunc_tanf, LibFunc_tanl)) {
+    if ((IsTan || IsCot) && hasFloatFn(M, &TLI, I.getType(), LibFunc_tan,
+                                       LibFunc_tanf, LibFunc_tanl)) {
       IRBuilder<> B(&I);
       IRBuilder<>::FastMathFlagGuard FMFGuard(B);
       B.setFastMathFlags(I.getFastMathFlags());
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 0e65a44..780a446 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -32,6 +32,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -408,6 +409,25 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
   if (Access.Addr->isSwiftError())
     return None;
 
+  // Peel off GEPs and BitCasts.
+  auto *Addr = Access.Addr->stripInBoundsOffsets();
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+    // Do not instrument PGO counter updates.
+    if (GV->hasSection()) {
+      StringRef SectionName = GV->getSection();
+      // Check if the global is in the PGO counters section.
+      auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat();
+      if (SectionName.endswith(
+              getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))
+        return None;
+    }
+
+    // Do not instrument accesses to LLVM internal variables.
+    if (GV->getName().startswith("__llvm"))
+      return None;
+  }
+
   const DataLayout &DL = I->getModule()->getDataLayout();
   Access.TypeSize = DL.getTypeStoreSizeInBits(Access.AccessTy);
   return Access;
@@ -613,8 +633,6 @@ bool MemProfiler::instrumentFunction(Function &F) {
 
   initializeCallbacks(*F.getParent());
 
-  FunctionModified |= insertDynamicShadowAtFunctionEntry(F);
-
   SmallVector<Instruction *, 16> ToInstrument;
 
   // Fill the set of memory operations to instrument.
@@ -625,6 +643,15 @@ bool MemProfiler::instrumentFunction(Function &F) {
     }
   }
 
+  if (ToInstrument.empty()) {
+    LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified
+                      << " " << F << "\n");
+
+    return FunctionModified;
+  }
+
+  FunctionModified |= insertDynamicShadowAtFunctionEntry(F);
+
   int NumInstrumented = 0;
   for (auto *Inst : ToInstrument) {
     if (ClDebugMin < 0 || ClDebugMax < 0 ||
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 5338032..2610ef1 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -774,12 +774,9 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
 
   unsigned NumOrigPreds = Preds.size();
   // We can only sink instructions through unconditional branches.
-  for (auto I = Preds.begin(); I != Preds.end();) {
-    if ((*I)->getTerminator()->getNumSuccessors() != 1)
-      I = Preds.erase(I);
-    else
-      ++I;
-  }
+  llvm::erase_if(Preds, [](BasicBlock *BB) {
+    return BB->getTerminator()->getNumSuccessors() != 1;
+  });
 
   LockstepReverseIterator LRI(Preds);
   SmallVector<SinkingInstructionCandidate, 4> Candidates;
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 11c756c..87202f7 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1100,6 +1100,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
     Value *StoredVal, Instruction *TheStore,
     SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
     const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
+  Module *M = TheStore->getModule();
   Value *SplatValue = isBytewiseValue(StoredVal, *DL);
   Constant *PatternValue = nullptr;
 
@@ -1182,15 +1183,14 @@ bool LoopIdiomRecognize::processLoopStridedStore(
     NewCall = Builder.CreateMemSet(
         BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
         /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
-  } else {
+  } else if (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) {
     // Everything is emitted in default address space
     Type *Int8PtrTy = DestInt8PtrTy;
 
-    Module *M = TheStore->getModule();
     StringRef FuncName = "memset_pattern16";
-    FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
-                                                Int8PtrTy, Int8PtrTy, IntIdxTy);
-    inferLibFuncAttributes(M, FuncName, *TLI);
+    FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16,
+                            Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
+    inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI);
 
     // Otherwise we should form a memset_pattern16.  PatternValue is known to be
     // an constant array of 16-bytes.  Plop the value into a mergable global.
@@ -1201,7 +1201,9 @@ bool LoopIdiomRecognize::processLoopStridedStore(
     GV->setAlignment(Align(16));
     Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
     NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
-  }
+  } else
+    return Changed;
+
   NewCall->setDebugLoc(TheStore->getDebugLoc());
 
   if (MSSAU) {
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index cff8f51..344f89e 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -575,9 +575,11 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
     if (OpI->getType()->isVectorTy()) {
       Scattered[I] = scatter(&CI, OpI);
       assert(Scattered[I].size() == NumElems && "mismatched call operands");
+      if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
+        Tys.push_back(OpI->getType()->getScalarType());
     } else {
       ScalarOperands[I] = OpI;
-      if (hasVectorIntrinsicOverloadedScalarOpd(ID, I))
+      if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
         Tys.push_back(OpI->getType());
     }
   }
@@ -593,7 +595,7 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
     ScalarCallOps.clear();
 
     for (unsigned J = 0; J != NumArgs; ++J) {
-      if (hasVectorIntrinsicScalarOpd(ID, J))
+      if (isVectorIntrinsicWithScalarOpAtArg(ID, J))
         ScalarCallOps.push_back(ScalarOperands[J]);
       else
         ScalarCallOps.push_back(Scattered[J][Elem]);
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index b3f1229..14c1fed 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -204,13 +204,19 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
 /// branch on a single value.
 static void buildPartialUnswitchConditionalBranch(
     BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
-    BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze) {
+    BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
+    Instruction *I, AssumptionCache *AC, DominatorTree &DT) {
   IRBuilder<> IRB(&BB);
 
-  Value *Cond = Direction ? IRB.CreateOr(Invariants) :
-    IRB.CreateAnd(Invariants);
-  if (InsertFreeze)
-    Cond = IRB.CreateFreeze(Cond, Cond->getName() + ".fr");
+  SmallVector<Value *> FrozenInvariants;
+  for (Value *Inv : Invariants) {
+    if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
+      Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
+    FrozenInvariants.push_back(Inv);
+  }
+
+  Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
+                          : IRB.CreateAnd(FrozenInvariants);
   IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
                    Direction ? &NormalSucc : &UnswitchedSucc);
 }
@@ -572,10 +578,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
              " condition!");
     buildPartialUnswitchConditionalBranch(
         *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
-        FreezeLoopUnswitchCond && any_of(Invariants, [&](Value *C) {
-          return !isGuaranteedNotToBeUndefOrPoison(C, nullptr,
-                                                   OldPH->getTerminator(), &DT);
-        }));
+        FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
   }
 
   // Update the dominator tree with the added edge.
@@ -2318,11 +2321,9 @@ static void unswitchNontrivialInvariants(
       buildPartialInvariantUnswitchConditionalBranch(
           *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
     else {
-      buildPartialUnswitchConditionalBranch(
-          *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
-          InsertFreeze && any_of(Invariants, [&](Value *C) {
-            return !isGuaranteedNotToBeUndefOrPoison(C, &AC, BI, &DT);
-          }));
+      buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
+                                            *ClonedPH, *LoopPH, InsertFreeze,
+                                            BI, &AC, DT);
     }
     DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
 
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 1f4f1c9..40fd407 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -39,7 +39,6 @@ STATISTIC(NumInaccessibleMemOrArgMemOnly,
 STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
 STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
 STATISTIC(NumWriteOnlyArg, "Number of arguments inferred as writeonly");
-STATISTIC(NumExtArg, "Number of arguments inferred as signext/zeroext.");
 STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
 STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
 STATISTIC(NumNoUndef, "Number of function returns inferred as noundef returns");
@@ -147,16 +146,6 @@ static bool setOnlyWritesMemory(Function &F, unsigned ArgNo) {
   return true;
 }
 
-static bool setArgExtAttr(Function &F, unsigned ArgNo,
-                          const TargetLibraryInfo &TLI, bool Signed = true) {
-  Attribute::AttrKind ExtAttr = TLI.getExtAttrForI32Param(Signed);
-  if (ExtAttr == Attribute::None || F.hasParamAttribute(ArgNo, ExtAttr))
-    return false;
-  F.addParamAttr(ArgNo, ExtAttr);
-  ++NumExtArg;
-  return true;
-}
-
 static bool setRetNoUndef(Function &F) {
   if (!F.getReturnType()->isVoidTy() &&
       !F.hasRetAttribute(Attribute::NoUndef)) {
@@ -231,6 +220,13 @@ static bool setAlignedAllocParam(Function &F, unsigned ArgNo) {
   return true;
 }
 
+static bool setAllocatedPointerParam(Function &F, unsigned ArgNo) {
+  if (F.hasParamAttribute(ArgNo, Attribute::AllocatedPointer))
+    return false;
+  F.addParamAttr(ArgNo, Attribute::AllocatedPointer);
+  return true;
+}
+
 static bool setAllocSize(Function &F, unsigned ElemSizeArg,
                          Optional<unsigned> NumElemsArg) {
   if (F.hasFnAttribute(Attribute::AllocSize))
@@ -240,15 +236,23 @@ static bool setAllocSize(Function &F, unsigned ElemSizeArg,
   return true;
 }
 
-bool llvm::inferLibFuncAttributes(Module *M, StringRef Name,
-                                  const TargetLibraryInfo &TLI) {
+static bool setAllocFamily(Function &F, StringRef Family) {
+  if (F.hasFnAttribute("alloc-family"))
+    return false;
+  F.addFnAttr("alloc-family", Family);
+  return true;
+}
+
+bool llvm::inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name,
+                                         const TargetLibraryInfo &TLI) {
   Function *F = M->getFunction(Name);
   if (!F)
     return false;
-  return inferLibFuncAttributes(*F, TLI);
+  return inferNonMandatoryLibFuncAttrs(*F, TLI);
 }
 
-bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
+bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
+                                         const TargetLibraryInfo &TLI) {
   LibFunc TheLibFunc;
   if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc)))
     return false;
@@ -376,6 +380,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     Changed |= setArgNoUndef(F, 1);
     LLVM_FALLTHROUGH;
   case LibFunc_strdup:
+    Changed |= setAllocFamily(F, "malloc");
     Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
     Changed |= setDoesNotThrow(F);
     Changed |= setRetDoesNotAlias(F);
@@ -437,7 +442,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     LLVM_FALLTHROUGH;
   case LibFunc_valloc:
   case LibFunc_malloc:
+    Changed |= setAllocFamily(F, "malloc");
+    LLVM_FALLTHROUGH;
   case LibFunc_vec_malloc:
+    Changed |= setAllocFamily(F, "vec_malloc");
     Changed |= setAllocSize(F, 0, None);
     Changed |= setOnlyAccessesInaccessibleMemory(F);
     Changed |= setRetAndArgsNoUndef(F);
@@ -501,6 +509,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_memalign:
+    Changed |= setAllocFamily(F, "malloc");
     Changed |= setAllocSize(F, 1, None);
     Changed |= setAlignedAllocParam(F, 0);
     Changed |= setOnlyAccessesInaccessibleMemory(F);
@@ -522,8 +531,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_realloc:
-  case LibFunc_vec_realloc:
   case LibFunc_reallocf:
+    Changed |= setAllocFamily(F, "malloc");
+    LLVM_FALLTHROUGH;
+  case LibFunc_vec_realloc:
+    Changed |= setAllocFamily(F, "vec_malloc");
+    Changed |= setAllocatedPointerParam(F, 0);
     Changed |= setAllocSize(F, 1, None);
     Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
     Changed |= setRetNoUndef(F);
@@ -597,7 +610,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     Changed |= setOnlyWritesMemory(F, 0);
     return Changed;
   case LibFunc_calloc:
+    Changed |= setAllocFamily(F, "malloc");
+    LLVM_FALLTHROUGH;
   case LibFunc_vec_calloc:
+    Changed |= setAllocFamily(F, "vec_malloc");
     Changed |= setAllocSize(F, 0, 1);
     Changed |= setOnlyAccessesInaccessibleMemory(F);
     Changed |= setRetAndArgsNoUndef(F);
@@ -656,7 +672,11 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_free:
+    Changed |= setAllocFamily(F, "malloc");
+    LLVM_FALLTHROUGH;
   case LibFunc_vec_free:
+    Changed |= setAllocFamily(F, "vec_malloc");
+    Changed |= setAllocatedPointerParam(F, 0);
     Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
     Changed |= setArgsNoUndef(F);
     Changed |= setDoesNotThrow(F);
@@ -845,7 +865,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
   case LibFunc_putchar:
   case LibFunc_putchar_unlocked:
     Changed |= setRetAndArgsNoUndef(F);
-    Changed |= setArgExtAttr(F, 0, TLI);
     Changed |= setDoesNotThrow(F);
     return Changed;
   case LibFunc_popen:
@@ -1066,7 +1085,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
   case LibFunc_ldexp:
   case LibFunc_ldexpf:
   case LibFunc_ldexpl:
-    Changed |= setArgExtAttr(F, 1, TLI);
     Changed |= setWillReturn(F);
     return Changed;
   case LibFunc_abs:
@@ -1203,34 +1221,141 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
   }
 }
 
-bool llvm::hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+static void setArgExtAttr(Function &F, unsigned ArgNo,
+                          const TargetLibraryInfo &TLI, bool Signed = true) {
+  Attribute::AttrKind ExtAttr = TLI.getExtAttrForI32Param(Signed);
+  if (ExtAttr != Attribute::None && !F.hasParamAttribute(ArgNo, ExtAttr))
+    F.addParamAttr(ArgNo, ExtAttr);
+}
+
+FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+                                        LibFunc TheLibFunc, FunctionType *T,
+                                        AttributeList AttributeList) {
+  assert(TLI.has(TheLibFunc) &&
+         "Creating call to non-existing library function.");
+  StringRef Name = TLI.getName(TheLibFunc);
+  FunctionCallee C = M->getOrInsertFunction(Name, T, AttributeList);
+
+  // Make sure any mandatory argument attributes are added.
+
+  // Any outgoing i32 argument should be handled with setArgExtAttr() which
+  // will add an extension attribute if the target ABI requires it. Adding
+  // argument extensions is typically done by the front end but when an
+  // optimizer is building a library call on its own it has to take care of
+  // this. Each such generated function must be handled here with sign or
+  // zero extensions as needed.  F is retreived with cast<> because we demand
+  // of the caller to have called isLibFuncEmittable() first.
+  Function *F = cast<Function>(C.getCallee());
+  assert(F->getFunctionType() == T && "Function type does not match.");
+  switch (TheLibFunc) {
+  case LibFunc_fputc:
+  case LibFunc_putchar:
+    setArgExtAttr(*F, 0, TLI);
+    break;
+  case LibFunc_ldexp:
+  case LibFunc_ldexpf:
+  case LibFunc_ldexpl:
+  case LibFunc_memchr:
+  case LibFunc_strchr:
+    setArgExtAttr(*F, 1, TLI);
+    break;
+  case LibFunc_memccpy:
+    setArgExtAttr(*F, 2, TLI);
+    break;
+
+    // These are functions that are known to not need any argument extension
+    // on any target: A size_t argument (which may be an i32 on some targets)
+    // should not trigger the assert below.
+  case LibFunc_bcmp:
+  case LibFunc_calloc:
+  case LibFunc_fwrite:
+  case LibFunc_malloc:
+  case LibFunc_memcmp:
+  case LibFunc_memcpy_chk:
+  case LibFunc_mempcpy:
+  case LibFunc_memset_pattern16:
+  case LibFunc_snprintf:
+  case LibFunc_stpncpy:
+  case LibFunc_strlcat:
+  case LibFunc_strlcpy:
+  case LibFunc_strncat:
+  case LibFunc_strncmp:
+  case LibFunc_strncpy:
+  case LibFunc_vsnprintf:
+    break;
+
+  default:
+#ifndef NDEBUG
+    for (unsigned i = 0; i < T->getNumParams(); i++)
+      assert(!isa<IntegerType>(T->getParamType(i)) &&
+             "Unhandled integer argument.");
+#endif
+    break;
+  }
+
+  return C;
+}
+
+FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+                                        LibFunc TheLibFunc, FunctionType *T) {
+  return getOrInsertLibFunc(M, TLI, TheLibFunc, T, AttributeList());
+}
+
+bool llvm::isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
+                              LibFunc TheLibFunc) {
+  StringRef FuncName = TLI->getName(TheLibFunc);
+  if (!TLI->has(TheLibFunc))
+    return false;
+
+  // Check if the Module already has a GlobalValue with the same name, in
+  // which case it must be a Function with the expected type.
+  if (GlobalValue *GV = M->getNamedValue(FuncName)) {
+    if (auto *F = dyn_cast<Function>(GV))
+      return TLI->isValidProtoForLibFunc(*F->getFunctionType(), TheLibFunc, *M);
+    return false;
+  }
+
+  return true;
+}
+
+bool llvm::isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
+                              StringRef Name) {
+  LibFunc TheLibFunc;
+  return TLI->getLibFunc(Name, TheLibFunc) &&
+         isLibFuncEmittable(M, TLI, TheLibFunc);
+}
+
+bool llvm::hasFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty,
                       LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn) {
   switch (Ty->getTypeID()) {
   case Type::HalfTyID:
     return false;
   case Type::FloatTyID:
-    return TLI->has(FloatFn);
+    return isLibFuncEmittable(M, TLI, FloatFn);
   case Type::DoubleTyID:
-    return TLI->has(DoubleFn);
+    return isLibFuncEmittable(M, TLI, DoubleFn);
   default:
-    return TLI->has(LongDoubleFn);
+    return isLibFuncEmittable(M, TLI, LongDoubleFn);
   }
 }
 
-StringRef llvm::getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty,
-                               LibFunc DoubleFn, LibFunc FloatFn,
-                               LibFunc LongDoubleFn) {
-  assert(hasFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
+StringRef llvm::getFloatFn(const Module *M, const TargetLibraryInfo *TLI,
+                           Type *Ty, LibFunc DoubleFn, LibFunc FloatFn,
+                           LibFunc LongDoubleFn, LibFunc &TheLibFunc) {
+  assert(hasFloatFn(M, TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
          "Cannot get name for unavailable function!");
 
   switch (Ty->getTypeID()) {
   case Type::HalfTyID:
     llvm_unreachable("No name for HalfTy!");
   case Type::FloatTyID:
+    TheLibFunc = FloatFn;
     return TLI->getName(FloatFn);
   case Type::DoubleTyID:
+    TheLibFunc = DoubleFn;
     return TLI->getName(DoubleFn);
   default:
+    TheLibFunc = LongDoubleFn;
     return TLI->getName(LongDoubleFn);
   }
 }
@@ -1247,14 +1372,14 @@ static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType,
                           ArrayRef<Value *> Operands, IRBuilderBase &B,
                           const TargetLibraryInfo *TLI,
                           bool IsVaArgs = false) {
-  if (!TLI->has(TheLibFunc))
+  Module *M = B.GetInsertBlock()->getModule();
+  if (!isLibFuncEmittable(M, TLI, TheLibFunc))
     return nullptr;
 
-  Module *M = B.GetInsertBlock()->getModule();
   StringRef FuncName = TLI->getName(TheLibFunc);
   FunctionType *FuncType = FunctionType::get(ReturnType, ParamTypes, IsVaArgs);
-  FunctionCallee Callee = M->getOrInsertFunction(FuncName, FuncType);
-  inferLibFuncAttributes(M, FuncName, *TLI);
+  FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, FuncType);
+  inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI);
   CallInst *CI = B.CreateCall(Callee, Operands, FuncName);
   if (const Function *F =
           dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
@@ -1323,16 +1448,16 @@ Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
 Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
                            IRBuilderBase &B, const DataLayout &DL,
                            const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_memcpy_chk))
+  Module *M = B.GetInsertBlock()->getModule();
+  if (!isLibFuncEmittable(M, TLI, LibFunc_memcpy_chk))
     return nullptr;
 
-  Module *M = B.GetInsertBlock()->getModule();
   AttributeList AS;
   AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex,
                           Attribute::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  FunctionCallee MemCpy = M->getOrInsertFunction(
-      "__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(),
+  FunctionCallee MemCpy = getOrInsertLibFunc(M, *TLI, LibFunc_memcpy_chk,
+      AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(),
       B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
       DL.getIntPtrType(Context));
   Dst = castToCStr(Dst, B);
@@ -1466,14 +1591,15 @@ static void appendTypeSuffix(Value *Op, StringRef &Name,
   }
 }
 
-static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
-                                         IRBuilderBase &B,
-                                         const AttributeList &Attrs) {
+static Value *emitUnaryFloatFnCallHelper(Value *Op, LibFunc TheLibFunc,
+                                         StringRef Name, IRBuilderBase &B,
+                                         const AttributeList &Attrs,
+                                         const TargetLibraryInfo *TLI) {
   assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall");
 
   Module *M = B.GetInsertBlock()->getModule();
-  FunctionCallee Callee =
-      M->getOrInsertFunction(Name, Op->getType(), Op->getType());
+  FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, Op->getType(),
+                                             Op->getType());
   CallInst *CI = B.CreateCall(Callee, Op, Name);
 
   // The incoming attribute set may have come from a speculatable intrinsic, but
@@ -1488,12 +1614,16 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
   return CI;
 }
 
-Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilderBase &B,
+Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
+                                  StringRef Name, IRBuilderBase &B,
                                   const AttributeList &Attrs) {
   SmallString<20> NameBuffer;
   appendTypeSuffix(Op, Name, NameBuffer);
 
-  return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
+  LibFunc TheLibFunc;
+  TLI->getLibFunc(Name, TheLibFunc);
+
+  return emitUnaryFloatFnCallHelper(Op, TheLibFunc, Name, B, Attrs, TLI);
 }
 
 Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
@@ -1501,23 +1631,25 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
                                   LibFunc LongDoubleFn, IRBuilderBase &B,
                                   const AttributeList &Attrs) {
   // Get the name of the function according to TLI.
-  StringRef Name = getFloatFnName(TLI, Op->getType(),
-                                  DoubleFn, FloatFn, LongDoubleFn);
+  Module *M = B.GetInsertBlock()->getModule();
+  LibFunc TheLibFunc;
+  StringRef Name = getFloatFn(M, TLI, Op->getType(), DoubleFn, FloatFn,
+                              LongDoubleFn, TheLibFunc);
 
-  return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
+  return emitUnaryFloatFnCallHelper(Op, TheLibFunc, Name, B, Attrs, TLI);
 }
 
 static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
+                                          LibFunc TheLibFunc,
                                           StringRef Name, IRBuilderBase &B,
                                           const AttributeList &Attrs,
-                                          const TargetLibraryInfo *TLI = nullptr) {
+                                          const TargetLibraryInfo *TLI) {
   assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
 
   Module *M = B.GetInsertBlock()->getModule();
-  FunctionCallee Callee = M->getOrInsertFunction(Name, Op1->getType(),
-                                                 Op1->getType(), Op2->getType());
-  if (TLI != nullptr)
-    inferLibFuncAttributes(M, Name, *TLI);
+  FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, Op1->getType(),
+                                             Op1->getType(), Op2->getType());
+  inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
   CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name);
 
   // The incoming attribute set may have come from a speculatable intrinsic, but
@@ -1532,15 +1664,19 @@ static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
   return CI;
 }
 
-Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
-                                   IRBuilderBase &B,
+Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
+                                   const TargetLibraryInfo *TLI,
+                                   StringRef Name, IRBuilderBase &B,
                                    const AttributeList &Attrs) {
   assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
 
   SmallString<20> NameBuffer;
   appendTypeSuffix(Op1, Name, NameBuffer);
 
-  return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs);
+  LibFunc TheLibFunc;
+  TLI->getLibFunc(Name, TheLibFunc);
+
+  return emitBinaryFloatFnCallHelper(Op1, Op2, TheLibFunc, Name, B, Attrs, TLI);
 }
 
 Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
@@ -1549,22 +1685,24 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
                                    LibFunc LongDoubleFn, IRBuilderBase &B,
                                    const AttributeList &Attrs) {
   // Get the name of the function according to TLI.
-  StringRef Name = getFloatFnName(TLI, Op1->getType(),
-                                  DoubleFn, FloatFn, LongDoubleFn);
+  Module *M = B.GetInsertBlock()->getModule();
+  LibFunc TheLibFunc;
+  StringRef Name = getFloatFn(M, TLI, Op1->getType(), DoubleFn, FloatFn,
+                              LongDoubleFn, TheLibFunc);
 
-  return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs, TLI);
+  return emitBinaryFloatFnCallHelper(Op1, Op2, TheLibFunc, Name, B, Attrs, TLI);
 }
 
 Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B,
                          const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_putchar))
+  Module *M = B.GetInsertBlock()->getModule();
+  if (!isLibFuncEmittable(M, TLI, LibFunc_putchar))
     return nullptr;
 
-  Module *M = B.GetInsertBlock()->getModule();
   StringRef PutCharName = TLI->getName(LibFunc_putchar);
-  FunctionCallee PutChar =
-      M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty());
-  inferLibFuncAttributes(M, PutCharName, *TLI);
+  FunctionCallee PutChar = getOrInsertLibFunc(M, *TLI, LibFunc_putchar,
+                                              B.getInt32Ty(), B.getInt32Ty());
+  inferNonMandatoryLibFuncAttrs(M, PutCharName, *TLI);
   CallInst *CI = B.CreateCall(PutChar,
                               B.CreateIntCast(Char,
                               B.getInt32Ty(),
@@ -1580,14 +1718,14 @@ Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B,
 
 Value *llvm::emitPutS(Value *Str, IRBuilderBase &B,
                       const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_puts))
+  Module *M = B.GetInsertBlock()->getModule();
+  if (!isLibFuncEmittable(M, TLI, LibFunc_puts))
     return nullptr;
 
-  Module *M = B.GetInsertBlock()->getModule();
   StringRef PutsName = TLI->getName(LibFunc_puts);
-  FunctionCallee PutS =
-      M->getOrInsertFunction(PutsName, B.getInt32Ty(), B.getInt8PtrTy());
-  inferLibFuncAttributes(M, PutsName, *TLI);
+  FunctionCallee PutS = getOrInsertLibFunc(M, *TLI, LibFunc_puts, B.getInt32Ty(),
+                                           B.getInt8PtrTy());
+  inferNonMandatoryLibFuncAttrs(M, PutsName, *TLI);
   CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName);
   if (const Function *F =
           dyn_cast<Function>(PutS.getCallee()->stripPointerCasts()))
@@ -1597,15 +1735,15 @@ Value *llvm::emitPutS(Value *Str, IRBuilderBase &B,
 
 Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilderBase &B,
                        const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_fputc))
+  Module *M = B.GetInsertBlock()->getModule();
+  if (!isLibFuncEmittable(M, TLI, LibFunc_fputc))
     return nullptr;
 
-  Module *M = B.GetInsertBlock()->getModule();
   StringRef FPutcName = TLI->getName(LibFunc_fputc);
-  FunctionCallee F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(),
-                                            B.getInt32Ty(), File->getType());
+  FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputc, B.getInt32Ty(),
+                                        B.getInt32Ty(), File->getType());
   if (File->getType()->isPointerTy())
-    inferLibFuncAttributes(M, FPutcName, *TLI);
+    inferNonMandatoryLibFuncAttrs(M, FPutcName, *TLI);
   Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
                          "chari");
   CallInst *CI = B.CreateCall(F, {Char, File}, FPutcName);
@@ -1618,15 +1756,15 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilderBase &B,
 
 Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B,
                        const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_fputs))
+  Module *M = B.GetInsertBlock()->getModule();
+  if (!isLibFuncEmittable(M, TLI, LibFunc_fputs))
     return nullptr;
 
-  Module *M = B.GetInsertBlock()->getModule();
   StringRef FPutsName = TLI->getName(LibFunc_fputs);
-  FunctionCallee F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
-                                            B.getInt8PtrTy(), File->getType());
+  FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputs, B.getInt32Ty(),
+                                        B.getInt8PtrTy(), File->getType());
   if (File->getType()->isPointerTy())
-    inferLibFuncAttributes(M, FPutsName, *TLI);
+    inferNonMandatoryLibFuncAttrs(M, FPutsName, *TLI);
   CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName);
 
   if (const Function *Fn =
@@ -1637,18 +1775,18 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B,
 
 Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B,
                         const DataLayout &DL, const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_fwrite))
+  Module *M = B.GetInsertBlock()->getModule();
+  if (!isLibFuncEmittable(M, TLI, LibFunc_fwrite))
     return nullptr;
 
-  Module *M = B.GetInsertBlock()->getModule();
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   StringRef FWriteName = TLI->getName(LibFunc_fwrite);
-  FunctionCallee F = M->getOrInsertFunction(
-      FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
-      DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
+  FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fwrite,
+       DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context),
+       DL.getIntPtrType(Context), File->getType());
 
   if (File->getType()->isPointerTy())
-    inferLibFuncAttributes(M, FWriteName, *TLI);
+    inferNonMandatoryLibFuncAttrs(M, FWriteName, *TLI);
   CallInst *CI =
       B.CreateCall(F, {castToCStr(Ptr, B), Size,
                        ConstantInt::get(DL.getIntPtrType(Context), 1), File});
@@ -1661,15 +1799,15 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B,
 
 Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL,
                         const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_malloc))
+  Module *M = B.GetInsertBlock()->getModule();
+  if (!isLibFuncEmittable(M, TLI, LibFunc_malloc))
     return nullptr;
 
-  Module *M = B.GetInsertBlock()->getModule();
   StringRef MallocName = TLI->getName(LibFunc_malloc);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  FunctionCallee Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(),
-                                                 DL.getIntPtrType(Context));
-  inferLibFuncAttributes(M, MallocName, *TLI);
+  FunctionCallee Malloc = getOrInsertLibFunc(M, *TLI, LibFunc_malloc,
+                                 B.getInt8PtrTy(), DL.getIntPtrType(Context));
+  inferNonMandatoryLibFuncAttrs(M, MallocName, *TLI);
   CallInst *CI = B.CreateCall(Malloc, Num, MallocName);
 
   if (const Function *F =
@@ -1681,16 +1819,16 @@ Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL,
 
 Value *llvm::emitCalloc(Value *Num, Value *Size, IRBuilderBase &B,
                         const TargetLibraryInfo &TLI) {
-  if (!TLI.has(LibFunc_calloc))
+  Module *M = B.GetInsertBlock()->getModule();
+  if (!isLibFuncEmittable(M, &TLI, LibFunc_calloc))
     return nullptr;
 
-  Module *M = B.GetInsertBlock()->getModule();
   StringRef CallocName = TLI.getName(LibFunc_calloc);
   const DataLayout &DL = M->getDataLayout();
   IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
-  FunctionCallee Calloc =
-      M->getOrInsertFunction(CallocName, B.getInt8PtrTy(), PtrType, PtrType);
-  inferLibFuncAttributes(M, CallocName, TLI);
+  FunctionCallee Calloc = getOrInsertLibFunc(M, TLI, LibFunc_calloc,
+                                             B.getInt8PtrTy(), PtrType, PtrType);
+  inferNonMandatoryLibFuncAttrs(M, CallocName, TLI);
   CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName);
 
   if (const auto *F =
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 7a9a272..e72e3ce 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -500,6 +500,13 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
     if (isMathLibCallNoop(Call, TLI))
       return true;
 
+  // Non-volatile atomic loads from constants can be removed.
+  if (auto *LI = dyn_cast<LoadInst>(I))
+    if (auto *GV = dyn_cast<GlobalVariable>(
+            LI->getPointerOperand()->stripPointerCasts()))
+      if (!LI->isVolatile() && GV->isConstant())
+        return true;
+
   return false;
 }
 
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 38dca39..0710511 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1190,13 +1190,15 @@ Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
 }
 
 Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilderBase &B) {
+  Module *M = CI->getModule();
   if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
     return V;
 
   // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
   // bcmp can be more efficient than memcmp because it only has to know that
   // there is a difference, not how different one is to the other.
-  if (TLI->has(LibFunc_bcmp) && isOnlyUsedInZeroEqualityComparison(CI)) {
+  if (isLibFuncEmittable(M, TLI, LibFunc_bcmp) &&
+      isOnlyUsedInZeroEqualityComparison(CI)) {
     Value *LHS = CI->getArgOperand(0);
     Value *RHS = CI->getArgOperand(1);
     Value *Size = CI->getArgOperand(2);
@@ -1360,7 +1362,8 @@ static Value *valueHasFloatPrecision(Value *Val) {
 
 /// Shrink double -> float functions.
 static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B,
-                               bool isBinary, bool isPrecise = false) {
+                               bool isBinary, const TargetLibraryInfo *TLI,
+                               bool isPrecise = false) {
   Function *CalleeFn = CI->getCalledFunction();
   if (!CI->getType()->isDoubleTy() || !CalleeFn)
     return nullptr;
@@ -1410,22 +1413,25 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B,
     R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
   } else {
     AttributeList CalleeAttrs = CalleeFn->getAttributes();
-    R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeName, B, CalleeAttrs)
-                 : emitUnaryFloatFnCall(V[0], CalleeName, B, CalleeAttrs);
+    R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], TLI, CalleeName, B,
+                                         CalleeAttrs)
+                 : emitUnaryFloatFnCall(V[0], TLI, CalleeName, B, CalleeAttrs);
   }
   return B.CreateFPExt(R, B.getDoubleTy());
 }
 
 /// Shrink double -> float for unary functions.
 static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilderBase &B,
+                                    const TargetLibraryInfo *TLI,
                                     bool isPrecise = false) {
-  return optimizeDoubleFP(CI, B, false, isPrecise);
+  return optimizeDoubleFP(CI, B, false, TLI, isPrecise);
 }
 
 /// Shrink double -> float for binary functions.
 static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilderBase &B,
+                                     const TargetLibraryInfo *TLI,
                                      bool isPrecise = false) {
-  return optimizeDoubleFP(CI, B, true, isPrecise);
+  return optimizeDoubleFP(CI, B, true, TLI, isPrecise);
 }
 
 // cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
@@ -1541,6 +1547,7 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
 /// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x);
 /// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x).
 Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
+  Module *M = Pow->getModule();
   Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
   AttributeList Attrs; // Attributes are only meaningful on the original call
   Module *Mod = Pow->getModule();
@@ -1568,7 +1575,8 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
 
     Function *CalleeFn = BaseFn->getCalledFunction();
     if (CalleeFn &&
-        TLI->getLibFunc(CalleeFn->getName(), LibFn) && TLI->has(LibFn)) {
+        TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
+        isLibFuncEmittable(M, TLI, LibFn)) {
       StringRef ExpName;
       Intrinsic::ID ID;
       Value *ExpFn;
@@ -1620,7 +1628,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
   // pow(2.0, itofp(x)) -> ldexp(1.0, x)
   if (match(Base, m_SpecificFP(2.0)) &&
       (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
-      hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
+      hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
     if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
       return copyFlags(*Pow,
                        emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI,
@@ -1629,7 +1637,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
   }
 
   // pow(2.0 ** n, x) -> exp2(n * x)
-  if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
+  if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
     APFloat BaseR = APFloat(1.0);
     BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
     BaseR = BaseR / *BaseF;
@@ -1656,7 +1664,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
   // pow(10.0, x) -> exp10(x)
   // TODO: There is no exp10() intrinsic yet, but some day there shall be one.
   if (match(Base, m_SpecificFP(10.0)) &&
-      hasFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
+      hasFloatFn(M, TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
     return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10,
                                                 LibFunc_exp10f, LibFunc_exp10l,
                                                 B, Attrs));
@@ -1681,7 +1689,8 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
         return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration(
                                                 Mod, Intrinsic::exp2, Ty),
                                             FMul, "exp2"));
-      else if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l))
+      else if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f,
+                          LibFunc_exp2l))
         return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
                                                     LibFunc_exp2f,
                                                     LibFunc_exp2l, B, Attrs));
@@ -1702,7 +1711,8 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
   }
 
   // Otherwise, use the libcall for sqrt().
-  if (hasFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl))
+  if (hasFloatFn(M, TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
+                 LibFunc_sqrtl))
     // TODO: We also should check that the target can in fact lower the sqrt()
     // libcall. We currently have no way to ask this question, so we ask if
     // the target has a sqrt() libcall, which is not exactly the same.
@@ -1892,8 +1902,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
   // Shrink pow() to powf() if the arguments are single precision,
   // unless the result is expected to be double precision.
   if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
-      hasFloatVersion(Name)) {
-    if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, true))
+      hasFloatVersion(M, Name)) {
+    if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, TLI, true))
       return Shrunk;
   }
 
@@ -1901,13 +1911,14 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
 }
 
 Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
+  Module *M = CI->getModule();
   Function *Callee = CI->getCalledFunction();
   AttributeList Attrs; // Attributes are only meaningful on the original call
   StringRef Name = Callee->getName();
   Value *Ret = nullptr;
   if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) &&
-      hasFloatVersion(Name))
-    Ret = optimizeUnaryDoubleFP(CI, B, true);
+      hasFloatVersion(M, Name))
+    Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
 
   Type *Ty = CI->getType();
   Value *Op = CI->getArgOperand(0);
@@ -1915,7 +1926,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
   // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= IntSize
   // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < IntSize
   if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) &&
-      hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
+      hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
     if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize()))
       return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI,
                                    LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
@@ -1926,12 +1937,14 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
 }
 
 Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) {
+  Module *M = CI->getModule();
+
   // If we can shrink the call to a float function rather than a double
   // function, do that first.
   Function *Callee = CI->getCalledFunction();
   StringRef Name = Callee->getName();
-  if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
-    if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
+  if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(M, Name))
+    if (Value *Ret = optimizeBinaryDoubleFP(CI, B, TLI))
       return Ret;
 
   // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
@@ -1962,8 +1975,8 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
   Type *Ty = Log->getType();
   Value *Ret = nullptr;
 
-  if (UnsafeFPShrink && hasFloatVersion(LogNm))
-    Ret = optimizeUnaryDoubleFP(Log, B, true);
+  if (UnsafeFPShrink && hasFloatVersion(Mod, LogNm))
+    Ret = optimizeUnaryDoubleFP(Log, B, TLI, true);
 
   // The earlier call must also be 'fast' in order to do these transforms.
   CallInst *Arg = dyn_cast<CallInst>(Log->getArgOperand(0));
@@ -2071,7 +2084,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
         Log->doesNotAccessMemory()
             ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty),
                            Arg->getOperand(0), "log")
-            : emitUnaryFloatFnCall(Arg->getOperand(0), LogNm, B, Attrs);
+            : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, Attrs);
     Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul");
     // Since pow() may have side effects, e.g. errno,
     // dead code elimination may not be trusted to remove it.
@@ -2094,7 +2107,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
     Value *LogE = Log->doesNotAccessMemory()
                       ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty),
                                      Eul, "log")
-                      : emitUnaryFloatFnCall(Eul, LogNm, B, Attrs);
+                      : emitUnaryFloatFnCall(Eul, TLI, LogNm, B, Attrs);
     Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul");
     // Since exp() may have side effects, e.g. errno,
     // dead code elimination may not be trusted to remove it.
@@ -2106,14 +2119,16 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
 }
 
 Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
+  Module *M = CI->getModule();
   Function *Callee = CI->getCalledFunction();
   Value *Ret = nullptr;
   // TODO: Once we have a way (other than checking for the existince of the
   // libcall) to tell whether our target can lower @llvm.sqrt, relax the
   // condition below.
-  if (TLI->has(LibFunc_sqrtf) && (Callee->getName() == "sqrt" ||
-                                  Callee->getIntrinsicID() == Intrinsic::sqrt))
-    Ret = optimizeUnaryDoubleFP(CI, B, true);
+  if (isLibFuncEmittable(M, TLI, LibFunc_sqrtf) &&
+      (Callee->getName() == "sqrt" ||
+       Callee->getIntrinsicID() == Intrinsic::sqrt))
+    Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
 
   if (!CI->isFast())
     return Ret;
@@ -2158,7 +2173,6 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
 
   // If we found a repeated factor, hoist it out of the square root and
   // replace it with the fabs of that factor.
-  Module *M = Callee->getParent();
   Type *ArgType = I->getType();
   Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
   Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
@@ -2175,11 +2189,12 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
 
 // TODO: Generalize to handle any trig function and its inverse.
 Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilderBase &B) {
+  Module *M = CI->getModule();
   Function *Callee = CI->getCalledFunction();
   Value *Ret = nullptr;
   StringRef Name = Callee->getName();
-  if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name))
-    Ret = optimizeUnaryDoubleFP(CI, B, true);
+  if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(M, Name))
+    Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
 
   Value *Op1 = CI->getArgOperand(0);
   auto *OpC = dyn_cast<CallInst>(Op1);
@@ -2195,7 +2210,8 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilderBase &B) {
   // tanl(atanl(x)) -> x
   LibFunc Func;
   Function *F = OpC->getCalledFunction();
-  if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
+  if (F && TLI->getLibFunc(F->getName(), Func) &&
+      isLibFuncEmittable(M, TLI, Func) &&
       ((Func == LibFunc_atan && Callee->getName() == "tan") ||
        (Func == LibFunc_atanf && Callee->getName() == "tanf") ||
        (Func == LibFunc_atanl && Callee->getName() == "tanl")))
@@ -2211,9 +2227,10 @@ static bool isTrigLibCall(CallInst *CI) {
          CI->hasFnAttr(Attribute::ReadNone);
 }
 
-static void insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
+static bool insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
                              bool UseFloat, Value *&Sin, Value *&Cos,
-                             Value *&SinCos) {
+                             Value *&SinCos, const TargetLibraryInfo *TLI) {
+  Module *M = OrigCallee->getParent();
   Type *ArgTy = Arg->getType();
   Type *ResTy;
   StringRef Name;
@@ -2233,9 +2250,12 @@ static void insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
     ResTy = StructType::get(ArgTy, ArgTy);
   }
 
-  Module *M = OrigCallee->getParent();
-  FunctionCallee Callee =
-      M->getOrInsertFunction(Name, OrigCallee->getAttributes(), ResTy, ArgTy);
+  if (!isLibFuncEmittable(M, TLI, Name))
+    return false;
+  LibFunc TheLibFunc;
+  TLI->getLibFunc(Name, TheLibFunc);
+  FunctionCallee Callee = getOrInsertLibFunc(
+      M, *TLI, TheLibFunc, OrigCallee->getAttributes(), ResTy, ArgTy);
 
   if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
     // If the argument is an instruction, it must dominate all uses so put our
@@ -2259,6 +2279,8 @@ static void insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
     Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
                                  "cospi");
   }
+
+  return true;
 }
 
 Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) {
@@ -2286,7 +2308,9 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) {
     return nullptr;
 
   Value *Sin, *Cos, *SinCos;
-  insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, SinCos);
+  if (!insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
+                        SinCos, TLI))
+    return nullptr;
 
   auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls,
                                  Value *Res) {
@@ -2307,6 +2331,7 @@ void LibCallSimplifier::classifyArgUse(
     SmallVectorImpl<CallInst *> &CosCalls,
     SmallVectorImpl<CallInst *> &SinCosCalls) {
   CallInst *CI = dyn_cast<CallInst>(Val);
+  Module *M = CI->getModule();
 
   if (!CI || CI->use_empty())
     return;
@@ -2317,7 +2342,8 @@ void LibCallSimplifier::classifyArgUse(
 
   Function *Callee = CI->getCalledFunction();
   LibFunc Func;
-  if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) ||
+  if (!Callee || !TLI->getLibFunc(*Callee, Func) ||
+      !isLibFuncEmittable(M, TLI, Func) ||
       !isTrigLibCall(CI))
     return;
 
@@ -2532,6 +2558,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
 
 Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
 
+  Module *M = CI->getModule();
   Function *Callee = CI->getCalledFunction();
   FunctionType *FT = Callee->getFunctionType();
   if (Value *V = optimizePrintFString(CI, B)) {
@@ -2540,10 +2567,10 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
 
   // printf(format, ...) -> iprintf(format, ...) if no floating point
   // arguments.
-  if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) {
-    Module *M = B.GetInsertBlock()->getParent()->getParent();
-    FunctionCallee IPrintFFn =
-        M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+  if (isLibFuncEmittable(M, TLI, LibFunc_iprintf) &&
+      !callHasFloatingPointArgument(CI)) {
+    FunctionCallee IPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_iprintf, FT,
+                                                  Callee->getAttributes());
     CallInst *New = cast<CallInst>(CI->clone());
     New->setCalledFunction(IPrintFFn);
     B.Insert(New);
@@ -2552,11 +2579,10 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
 
   // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
   // arguments.
-  if (TLI->has(LibFunc_small_printf) && !callHasFP128Argument(CI)) {
-    Module *M = B.GetInsertBlock()->getParent()->getParent();
-    auto SmallPrintFFn =
-        M->getOrInsertFunction(TLI->getName(LibFunc_small_printf),
-                               FT, Callee->getAttributes());
+  if (isLibFuncEmittable(M, TLI, LibFunc_small_printf) &&
+      !callHasFP128Argument(CI)) {
+    auto SmallPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_printf, FT,
+                                            Callee->getAttributes());
     CallInst *New = cast<CallInst>(CI->clone());
     New->setCalledFunction(SmallPrintFFn);
     B.Insert(New);
@@ -2655,6 +2681,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
 }
 
 Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
+  Module *M = CI->getModule();
   Function *Callee = CI->getCalledFunction();
   FunctionType *FT = Callee->getFunctionType();
   if (Value *V = optimizeSPrintFString(CI, B)) {
@@ -2663,10 +2690,10 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
 
   // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
   // point arguments.
-  if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) {
-    Module *M = B.GetInsertBlock()->getParent()->getParent();
-    FunctionCallee SIPrintFFn =
-        M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+  if (isLibFuncEmittable(M, TLI, LibFunc_siprintf) &&
+      !callHasFloatingPointArgument(CI)) {
+    FunctionCallee SIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_siprintf,
+                                                   FT, Callee->getAttributes());
     CallInst *New = cast<CallInst>(CI->clone());
     New->setCalledFunction(SIPrintFFn);
     B.Insert(New);
@@ -2675,11 +2702,10 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
 
   // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
   // floating point arguments.
-  if (TLI->has(LibFunc_small_sprintf) && !callHasFP128Argument(CI)) {
-    Module *M = B.GetInsertBlock()->getParent()->getParent();
-    auto SmallSPrintFFn =
-        M->getOrInsertFunction(TLI->getName(LibFunc_small_sprintf),
-                               FT, Callee->getAttributes());
+  if (isLibFuncEmittable(M, TLI, LibFunc_small_sprintf) &&
+      !callHasFP128Argument(CI)) {
+    auto SmallSPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_sprintf, FT,
+                                             Callee->getAttributes());
     CallInst *New = cast<CallInst>(CI->clone());
     New->setCalledFunction(SmallSPrintFFn);
     B.Insert(New);
@@ -2835,6 +2861,7 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
 }
 
 Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) {
+  Module *M = CI->getModule();
   Function *Callee = CI->getCalledFunction();
   FunctionType *FT = Callee->getFunctionType();
   if (Value *V = optimizeFPrintFString(CI, B)) {
@@ -2843,10 +2870,10 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) {
 
   // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
   // floating point arguments.
-  if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) {
-    Module *M = B.GetInsertBlock()->getParent()->getParent();
-    FunctionCallee FIPrintFFn =
-        M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+  if (isLibFuncEmittable(M, TLI, LibFunc_fiprintf) &&
+      !callHasFloatingPointArgument(CI)) {
+    FunctionCallee FIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_fiprintf,
+                                                   FT, Callee->getAttributes());
     CallInst *New = cast<CallInst>(CI->clone());
     New->setCalledFunction(FIPrintFFn);
     B.Insert(New);
@@ -2855,11 +2882,11 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) {
 
   // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
   // 128-bit floating point arguments.
-  if (TLI->has(LibFunc_small_fprintf) && !callHasFP128Argument(CI)) {
-    Module *M = B.GetInsertBlock()->getParent()->getParent();
+  if (isLibFuncEmittable(M, TLI, LibFunc_small_fprintf) &&
+      !callHasFP128Argument(CI)) {
     auto SmallFPrintFFn =
-        M->getOrInsertFunction(TLI->getName(LibFunc_small_fprintf),
-                               FT, Callee->getAttributes());
+        getOrInsertLibFunc(M, *TLI, LibFunc_small_fprintf, FT,
+                           Callee->getAttributes());
     CallInst *New = cast<CallInst>(CI->clone());
     New->setCalledFunction(SmallFPrintFFn);
     B.Insert(New);
@@ -2944,21 +2971,19 @@ Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) {
                                         CI->getArgOperand(2)));
 }
 
-bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
-  LibFunc Func;
+bool LibCallSimplifier::hasFloatVersion(const Module *M, StringRef FuncName) {
   SmallString<20> FloatFuncName = FuncName;
   FloatFuncName += 'f';
-  if (TLI->getLibFunc(FloatFuncName, Func))
-    return TLI->has(Func);
-  return false;
+  return isLibFuncEmittable(M, TLI, FloatFuncName);
 }
 
 Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
                                                       IRBuilderBase &Builder) {
+  Module *M = CI->getModule();
   LibFunc Func;
   Function *Callee = CI->getCalledFunction();
   // Check for string/memory library functions.
-  if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
+  if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) {
     // Make sure we never change the calling convention.
     assert(
         (ignoreCallingConv(Func) ||
@@ -3039,6 +3064,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
 Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
                                                        LibFunc Func,
                                                        IRBuilderBase &Builder) {
+  const Module *M = CI->getModule();
+
   // Don't optimize calls that require strict floating point semantics.
   if (CI->isStrictFP())
     return nullptr;
@@ -3117,12 +3144,12 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
   case LibFunc_sin:
   case LibFunc_sinh:
   case LibFunc_tanh:
-    if (UnsafeFPShrink && hasFloatVersion(CI->getCalledFunction()->getName()))
-      return optimizeUnaryDoubleFP(CI, Builder, true);
+    if (UnsafeFPShrink && hasFloatVersion(M, CI->getCalledFunction()->getName()))
+      return optimizeUnaryDoubleFP(CI, Builder, TLI, true);
     return nullptr;
   case LibFunc_copysign:
-    if (hasFloatVersion(CI->getCalledFunction()->getName()))
-      return optimizeBinaryDoubleFP(CI, Builder);
+    if (hasFloatVersion(M, CI->getCalledFunction()->getName()))
+      return optimizeBinaryDoubleFP(CI, Builder, TLI);
     return nullptr;
   case LibFunc_fminf:
   case LibFunc_fmin:
@@ -3141,6 +3168,7 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
 }
 
 Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
+  Module *M = CI->getModule();
   assert(!CI->isMustTailCall() && "These transforms aren't musttail safe.");
 
   // TODO: Split out the code below that operates on FP calls so that
@@ -3219,7 +3247,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
   }
 
   // Then check for known library functions.
-  if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
+  if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) {
     // We never change the calling convention.
     if (!ignoreCallingConv(Func) && !IsCallingConvC)
       return nullptr;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 5ecee44..d3a944c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -441,6 +441,26 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
   return false;
 }
 
+/// Returns true if A and B have same pointer operands or same SCEVs addresses
+static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A,
+                               StoreInst *B) {
+  // Compare store
+  if (A == B)
+    return true;
+
+  // Otherwise Compare pointers
+  Value *APtr = A->getPointerOperand();
+  Value *BPtr = B->getPointerOperand();
+  if (APtr == BPtr)
+    return true;
+
+  // Otherwise compare address SCEVs
+  if (SE->getSCEV(APtr) == SE->getSCEV(BPtr))
+    return true;
+
+  return false;
+}
+
 int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
                                                 Value *Ptr) const {
   const ValueToValueMap &Strides =
@@ -678,7 +698,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
         RecurrenceDescriptor RedDes;
         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
-                                                 DT)) {
+                                                 DT, PSE.getSE())) {
           Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
           AllowedExit.insert(RedDes.getLoopExitInstr());
           Reductions[Phi] = RedDes;
@@ -772,7 +792,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         auto *SE = PSE.getSE();
         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
         for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
-          if (hasVectorIntrinsicScalarOpd(IntrinID, i)) {
+          if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) {
             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
               reportVectorizationFailure("Found unvectorizable intrinsic",
                   "intrinsic instruction cannot be vectorized",
@@ -913,11 +933,66 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   if (!LAI->canVectorizeMemory())
     return false;
 
-  if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
-    reportVectorizationFailure("Stores to a uniform address",
-        "write to a loop invariant address could not be vectorized",
-        "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
-    return false;
+  // We can vectorize stores to invariant address when final reduction value is
+  // guaranteed to be stored at the end of the loop. Also, if decision to
+  // vectorize loop is made, runtime checks are added so as to make sure that
+  // invariant address won't alias with any other objects.
+  if (!LAI->getStoresToInvariantAddresses().empty()) {
+    // For each invariant address, check its last stored value is unconditional.
+    for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
+      if (isInvariantStoreOfReduction(SI) &&
+          blockNeedsPredication(SI->getParent())) {
+        reportVectorizationFailure(
+            "We don't allow storing to uniform addresses",
+            "write of conditional recurring variant value to a loop "
+            "invariant address could not be vectorized",
+            "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+        return false;
+      }
+    }
+
+    if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
+      // For each invariant address, check its last stored value is the result
+      // of one of our reductions.
+      //
+      // We do not check if dependence with loads exists because they are
+      // currently rejected earlier in LoopAccessInfo::analyzeLoop. In case this
+      // behaviour changes we have to modify this code.
+      ScalarEvolution *SE = PSE.getSE();
+      SmallVector<StoreInst *, 4> UnhandledStores;
+      for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
+        if (isInvariantStoreOfReduction(SI)) {
+          // Earlier stores to this address are effectively deadcode.
+          // With opaque pointers it is possible for one pointer to be used with
+          // different sizes of stored values:
+          //    store i32 0, ptr %x
+          //    store i8 0, ptr %x
+          // The latest store doesn't complitely overwrite the first one in the
+          // example. That is why we have to make sure that types of stored
+          // values are same.
+          // TODO: Check that bitwidth of unhandled store is smaller then the
+          // one that overwrites it and add a test.
+          erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
+            return storeToSameAddress(SE, SI, I) &&
+                   I->getValueOperand()->getType() ==
+                       SI->getValueOperand()->getType();
+          });
+          continue;
+        }
+        UnhandledStores.push_back(SI);
+      }
+
+      bool IsOK = UnhandledStores.empty();
+      // TODO: we should also validate against InvariantMemSets.
+      if (!IsOK) {
+        reportVectorizationFailure(
+            "We don't allow storing to uniform addresses",
+            "write to a loop invariant address could not "
+            "be vectorized",
+            "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+        return false;
+      }
+    }
   }
 
   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
@@ -944,13 +1019,34 @@ bool LoopVectorizationLegality::canVectorizeFPMath(
 
   // We can now only vectorize if all reductions with Exact FP math also
   // have the isOrdered flag set, which indicates that we can move the
-  // reduction operations in-loop.
+  // reduction operations in-loop, and do not have intermediate store.
   return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
     const RecurrenceDescriptor &RdxDesc = Reduction.second;
-    return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
+    return !RdxDesc.hasExactFPMath() ||
+           (RdxDesc.isOrdered() && !RdxDesc.IntermediateStore);
   }));
 }
 
+bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) {
+  return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
+    const RecurrenceDescriptor &RdxDesc = Reduction.second;
+    return RdxDesc.IntermediateStore == SI;
+  });
+}
+
+bool LoopVectorizationLegality::isInvariantAddressOfReduction(Value *V) {
+  return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
+    const RecurrenceDescriptor &RdxDesc = Reduction.second;
+    if (!RdxDesc.IntermediateStore)
+      return false;
+
+    ScalarEvolution *SE = PSE.getSE();
+    Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
+    return V == InvariantAddress ||
+           SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
+  });
+}
+
 bool LoopVectorizationLegality::isInductionPhi(const Value *V) const {
   Value *In0 = const_cast<Value *>(V);
   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3bedf4b..d59abd2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3998,6 +3998,17 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
   // Set the resume value for this reduction
   ReductionResumeValues.insert({&RdxDesc, BCBlockPhi});
 
+  // If there were stores of the reduction value to a uniform memory address
+  // inside the loop, create the final store here.
+  if (StoreInst *SI = RdxDesc.IntermediateStore) {
+    StoreInst *NewSI =
+        Builder.CreateStore(ReducedPartRdx, SI->getPointerOperand());
+    propagateMetadata(NewSI, SI);
+
+    // If the reduction value is used in other places,
+    // then let the code below create PHI's for that.
+  }
+
   // Now, we need to fix the users of the reduction variable
   // inside and outside of the scalar remainder loop.
 
@@ -4244,13 +4255,13 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
       // Some intrinsics have a scalar argument - don't replace it with a
       // vector.
       Value *Arg;
-      if (!UseVectorIntrinsic || !hasVectorIntrinsicScalarOpd(ID, I.index()))
+      if (!UseVectorIntrinsic ||
+          !isVectorIntrinsicWithScalarOpAtArg(ID, I.index()))
         Arg = State.get(I.value(), Part);
-      else {
+      else
         Arg = State.get(I.value(), VPIteration(0, 0));
-        if (hasVectorIntrinsicOverloadedScalarOpd(ID, I.index()))
-          TysForDecl.push_back(Arg->getType());
-      }
+      if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I.index()))
+        TysForDecl.push_back(Arg->getType());
       Args.push_back(Arg);
     }
 
@@ -7340,6 +7351,16 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
   // Ignore ephemeral values.
   CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
 
+  // Find all stores to invariant variables. Since they are going to sink
+  // outside the loop we do not need calculate cost for them.
+  for (BasicBlock *BB : TheLoop->blocks())
+    for (Instruction &I : *BB) {
+      StoreInst *SI;
+      if ((SI = dyn_cast<StoreInst>(&I)) &&
+          Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
+        ValuesToIgnore.insert(&I);
+    }
+
   // Ignore type-promoting instructions we identified during reduction
   // detection.
   for (auto &Reduction : Legal->getReductionVars()) {
@@ -8329,6 +8350,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
     return nullptr;
 
   auto willWiden = [&](ElementCount VF) -> bool {
+    if (VF.isScalar())
+       return false;
     Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
     // The following case may be scalarized depending on the VF.
     // The flag shows whether we use Intrinsic or a usual Call for vectorized
@@ -8843,6 +8866,13 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
         continue;
       }
 
+      // Invariant stores inside loop will be deleted and a single store
+      // with the final reduction value will be added to the exit block
+      StoreInst *SI;
+      if ((SI = dyn_cast<StoreInst>(&I)) &&
+          Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
+        continue;
+
       // Otherwise, if all widening options failed, Instruction is to be
       // replicated. This may create a successor for VPBB.
       VPBasicBlock *NextVPBB =
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a6b1bb8..4583308 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -641,7 +641,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
     CallInst *CI = cast<CallInst>(UserInst);
     Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
     for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) {
-      if (hasVectorIntrinsicScalarOpd(ID, i))
+      if (isVectorIntrinsicWithScalarOpAtArg(ID, i))
         return (CI->getArgOperand(i) == Scalar);
     }
     LLVM_FALLTHROUGH;
@@ -2042,6 +2042,36 @@ public:
     DeletedInstructions.insert(I);
   }
 
+  /// Checks if the instruction was already analyzed for being possible
+  /// reduction root.
+  bool isAnalizedReductionRoot(Instruction *I) const {
+    return AnalizedReductionsRoots.count(I);
+  }
+  /// Register given instruction as already analyzed for being possible
+  /// reduction root.
+  void analyzedReductionRoot(Instruction *I) {
+    AnalizedReductionsRoots.insert(I);
+  }
+  /// Checks if the provided list of reduced values was checked already for
+  /// vectorization.
+  bool areAnalyzedReductionVals(ArrayRef<Value *> VL) {
+    return AnalyzedReductionVals.contains(hash_value(VL));
+  }
+  /// Adds the list of reduced values to list of already checked values for the
+  /// vectorization.
+  void analyzedReductionVals(ArrayRef<Value *> VL) {
+    AnalyzedReductionVals.insert(hash_value(VL));
+  }
+  /// Clear the list of the analyzed reduction root instructions.
+  void clearReductionData() {
+    AnalizedReductionsRoots.clear();
+    AnalyzedReductionVals.clear();
+  }
+  /// Checks if the given value is gathered in one of the nodes.
+  bool isGathered(Value *V) const {
+    return MustGather.contains(V);
+  }
+
   ~BoUpSLP();
 
 private:
@@ -2603,6 +2633,12 @@ private:
   /// previously deleted instruction.
   DenseSet<Instruction *> DeletedInstructions;
 
+  /// Set of the instruction, being analyzed already for reductions.
+  SmallPtrSet<Instruction *, 16> AnalizedReductionsRoots;
+
+  /// Set of hashes for the list of reduction values already being analyzed.
+  DenseSet<size_t> AnalyzedReductionVals;
+
   /// A list of values that need to extracted out of the tree.
   /// This list holds pairs of (Internal Scalar : External User). External User
   /// can be nullptr, it means that this Internal Scalar will be used later,
@@ -4041,6 +4077,83 @@ static bool needToScheduleSingleInstruction(ArrayRef<Value *> VL) {
 }
 #endif
 
+/// Generates key/subkey pair for the given value to provide effective sorting
+/// of the values and better detection of the vectorizable values sequences. The
+/// keys/subkeys can be used for better sorting of the values themselves (keys)
+/// and in values subgroups (subkeys).
+static std::pair<size_t, size_t> generateKeySubkey(
+    Value *V, const TargetLibraryInfo *TLI,
+    function_ref<hash_code(size_t, LoadInst *)> LoadsSubkeyGenerator,
+    bool AllowAlternate) {
+  hash_code Key = hash_value(V->getValueID() + 2);
+  hash_code SubKey = hash_value(0);
+  // Sort the loads by the distance between the pointers.
+  if (auto *LI = dyn_cast<LoadInst>(V)) {
+    Key = hash_combine(hash_value(Instruction::Load), Key);
+    if (LI->isSimple())
+      SubKey = hash_value(LoadsSubkeyGenerator(Key, LI));
+    else
+      SubKey = hash_value(LI);
+  } else if (isVectorLikeInstWithConstOps(V)) {
+    // Sort extracts by the vector operands.
+    if (isa<ExtractElementInst, UndefValue>(V))
+      Key = hash_value(Value::UndefValueVal + 1);
+    if (auto *EI = dyn_cast<ExtractElementInst>(V)) {
+      if (!isUndefVector(EI->getVectorOperand()) &&
+          !isa<UndefValue>(EI->getIndexOperand()))
+        SubKey = hash_value(EI->getVectorOperand());
+    }
+  } else if (auto *I = dyn_cast<Instruction>(V)) {
+    // Sort other instructions just by the opcodes except for CMPInst.
+    // For CMP also sort by the predicate kind.
+    if ((isa<BinaryOperator>(I) || isa<CastInst>(I)) &&
+        isValidForAlternation(I->getOpcode())) {
+      if (AllowAlternate)
+        Key = hash_value(isa<BinaryOperator>(I) ? 1 : 0);
+      else
+        Key = hash_combine(hash_value(I->getOpcode()), Key);
+      SubKey = hash_combine(
+          hash_value(I->getOpcode()), hash_value(I->getType()),
+          hash_value(isa<BinaryOperator>(I)
+                         ? I->getType()
+                         : cast<CastInst>(I)->getOperand(0)->getType()));
+    } else if (auto *CI = dyn_cast<CmpInst>(I)) {
+      CmpInst::Predicate Pred = CI->getPredicate();
+      if (CI->isCommutative())
+        Pred = std::min(Pred, CmpInst::getInversePredicate(Pred));
+      CmpInst::Predicate SwapPred = CmpInst::getSwappedPredicate(Pred);
+      SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(Pred),
+                            hash_value(SwapPred),
+                            hash_value(CI->getOperand(0)->getType()));
+    } else if (auto *Call = dyn_cast<CallInst>(I)) {
+      Intrinsic::ID ID = getVectorIntrinsicIDForCall(Call, TLI);
+      if (isTriviallyVectorizable(ID))
+        SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(ID));
+      else if (!VFDatabase(*Call).getMappings(*Call).empty())
+        SubKey = hash_combine(hash_value(I->getOpcode()),
+                              hash_value(Call->getCalledFunction()));
+      else
+        SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(Call));
+      for (const CallBase::BundleOpInfo &Op : Call->bundle_op_infos())
+        SubKey = hash_combine(hash_value(Op.Begin), hash_value(Op.End),
+                              hash_value(Op.Tag), SubKey);
+    } else if (auto *Gep = dyn_cast<GetElementPtrInst>(I)) {
+      if (Gep->getNumOperands() == 2 && isa<ConstantInt>(Gep->getOperand(1)))
+        SubKey = hash_value(Gep->getPointerOperand());
+      else
+        SubKey = hash_value(Gep);
+    } else if (BinaryOperator::isIntDivRem(I->getOpcode()) &&
+               !isa<ConstantInt>(I->getOperand(1))) {
+      // Do not try to vectorize instructions with potentially high cost.
+      SubKey = hash_value(I);
+    } else {
+      SubKey = hash_value(I->getOpcode());
+    }
+    Key = hash_combine(hash_value(I->getParent()), Key);
+  }
+  return std::make_pair(Key, SubKey);
+}
+
 void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
                             const EdgeInfo &UserTreeIdx) {
   assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
@@ -4742,7 +4855,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       unsigned NumArgs = CI->arg_size();
       SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
       for (unsigned j = 0; j != NumArgs; ++j)
-        if (hasVectorIntrinsicScalarOpd(ID, j))
+        if (isVectorIntrinsicWithScalarOpAtArg(ID, j))
           ScalarArgs[j] = CI->getArgOperand(j);
       for (Value *V : VL) {
         CallInst *CI2 = dyn_cast<CallInst>(V);
@@ -4761,7 +4874,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         // Some intrinsics have scalar arguments and should be same in order for
         // them to be vectorized.
         for (unsigned j = 0; j != NumArgs; ++j) {
-          if (hasVectorIntrinsicScalarOpd(ID, j)) {
+          if (isVectorIntrinsicWithScalarOpAtArg(ID, j)) {
             Value *A1J = CI2->getArgOperand(j);
             if (ScalarArgs[j] != A1J) {
               BS.cancelScheduling(VL, VL0);
@@ -4794,7 +4907,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) {
         // For scalar operands no need to to create an entry since no need to
         // vectorize it.
-        if (hasVectorIntrinsicScalarOpd(ID, i))
+        if (isVectorIntrinsicWithScalarOpAtArg(ID, i))
           continue;
         ValueList Operands;
         // Prepare the operand vector.
@@ -6238,10 +6351,10 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
             ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
             // Find the insertvector, vectorized in tree, if any.
             Value *Base = VU;
-            while (isa<InsertElementInst>(Base)) {
+            while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
               // Build the mask for the vectorized insertelement instructions.
-              if (const TreeEntry *E = getTreeEntry(Base)) {
-                VU = cast<InsertElementInst>(Base);
+              if (const TreeEntry *E = getTreeEntry(IEBase)) {
+                VU = IEBase;
                 do {
                   int Idx = E->findLaneForValue(Base);
                   ShuffleMask.back()[Idx] = Idx;
@@ -6257,8 +6370,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
           } else {
             VecId = std::distance(FirstUsers.begin(), It);
           }
-          ShuffleMask[VecId][*InsertIdx] = EU.Lane;
-          DemandedElts[VecId].setBit(*InsertIdx);
+          int InIdx = *InsertIdx;
+          ShuffleMask[VecId][InIdx] = EU.Lane;
+          DemandedElts[VecId].setBit(InIdx);
           continue;
         }
       }
@@ -6459,6 +6573,12 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
     }
   }
 
+  if (UsedTEs.empty()) {
+    assert(all_of(TE->Scalars, UndefValue::classof) &&
+           "Expected vector of undefs only.");
+    return None;
+  }
+
   unsigned VF = 0;
   if (UsedTEs.size() == 1) {
     // Try to find the perfect match in another gather node at first.
@@ -6612,11 +6732,15 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
   // should not be scheduled.
   if (E->State != TreeEntry::NeedToGather &&
       doesNotNeedToSchedule(E->Scalars)) {
-    BasicBlock::iterator InsertPt;
+    Instruction *InsertInst;
     if (all_of(E->Scalars, isUsedOutsideBlock))
-      InsertPt = FindLastInst()->getIterator();
+      InsertInst = FindLastInst();
     else
-      InsertPt = FindFirstInst()->getIterator();
+      InsertInst = FindFirstInst();
+    // If the instruction is PHI, set the insert point after all the PHIs.
+    if (isa<PHINode>(InsertInst))
+      InsertInst = BB->getFirstNonPHI();
+    BasicBlock::iterator InsertPt = InsertInst->getIterator();
     Builder.SetInsertPoint(BB, InsertPt);
     Builder.SetCurrentDebugLocation(Front->getDebugLoc());
     return;
@@ -6658,13 +6782,17 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
   // not ideal. However, this should be exceedingly rare since it requires that
   // we both exit early from buildTree_rec and that the bundle be out-of-order
   // (causing us to iterate all the way to the end of the block).
-  if (!LastInst)
+  if (!LastInst) {
     LastInst = FindLastInst();
+    // If the instruction is PHI, set the insert point after all the PHIs.
+    if (isa<PHINode>(LastInst))
+      LastInst = BB->getFirstNonPHI()->getPrevNode();
+  }
   assert(LastInst && "Failed to find last instruction in bundle");
 
   // Set the insertion point after the last instruction in the bundle. Set the
   // debug location to Front.
-  Builder.SetInsertPoint(BB, ++LastInst->getIterator());
+  Builder.SetInsertPoint(BB, std::next(LastInst->getIterator()));
   Builder.SetCurrentDebugLocation(Front->getDebugLoc());
 }
 
@@ -7358,11 +7486,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         ValueList OpVL;
         // Some intrinsics have scalar arguments. This argument should not be
         // vectorized.
-        if (UseIntrinsic && hasVectorIntrinsicScalarOpd(IID, j)) {
+        if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, j)) {
           CallInst *CEI = cast<CallInst>(VL0);
           ScalarArg = CEI->getArgOperand(j);
           OpVecs.push_back(CEI->getArgOperand(j));
-          if (hasVectorIntrinsicOverloadedScalarOpd(IID, j))
+          if (isVectorIntrinsicWithOverloadTypeAtArg(IID, j))
             TysForDecl.push_back(ScalarArg->getType());
           continue;
         }
@@ -7370,6 +7498,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         Value *OpVec = vectorizeTree(E->getOperand(j));
         LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
         OpVecs.push_back(OpVec);
+        if (isVectorIntrinsicWithOverloadTypeAtArg(IID, j))
+          TysForDecl.push_back(OpVec->getType());
       }
 
       Function *CF;
@@ -8804,6 +8934,8 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
 
   // Scan the blocks in the function in post order.
   for (auto BB : post_order(&F.getEntryBlock())) {
+    // Start new block - clear the list of reduction roots.
+    R.clearReductionData();
     collectSeedInstructions(BB);
 
     // Vectorize trees that end at stores.
@@ -9273,15 +9405,16 @@ class HorizontalReduction {
   using ReductionOpsType = SmallVector<Value *, 16>;
   using ReductionOpsListType = SmallVector<ReductionOpsType, 2>;
   ReductionOpsListType ReductionOps;
-  SmallVector<Value *, 32> ReducedVals;
+  /// List of possibly reduced values.
+  SmallVector<SmallVector<Value *>> ReducedVals;
+  /// Maps reduced value to the corresponding reduction operation.
+  DenseMap<Value *, SmallVector<Instruction *>> ReducedValsToOps;
   // Use map vector to make stable output.
   MapVector<Instruction *, Value *> ExtraArgs;
   WeakTrackingVH ReductionRoot;
   /// The type of reduction operation.
   RecurKind RdxKind;
 
-  const unsigned INVALID_OPERAND_INDEX = std::numeric_limits<unsigned>::max();
-
   static bool isCmpSelMinMax(Instruction *I) {
     return match(I, m_Select(m_Cmp(), m_Value(), m_Value())) &&
            RecurrenceDescriptor::isMinMaxRecurrenceKind(getRdxKind(I));
@@ -9325,26 +9458,6 @@ class HorizontalReduction {
     return I->getOperand(Index);
   }
 
-  /// Checks if the ParentStackElem.first should be marked as a reduction
-  /// operation with an extra argument or as extra argument itself.
-  void markExtraArg(std::pair<Instruction *, unsigned> &ParentStackElem,
-                    Value *ExtraArg) {
-    if (ExtraArgs.count(ParentStackElem.first)) {
-      ExtraArgs[ParentStackElem.first] = nullptr;
-      // We ran into something like:
-      // ParentStackElem.first = ExtraArgs[ParentStackElem.first] + ExtraArg.
-      // The whole ParentStackElem.first should be considered as an extra value
-      // in this case.
-      // Do not perform analysis of remaining operands of ParentStackElem.first
-      // instruction, this whole instruction is an extra argument.
-      ParentStackElem.second = INVALID_OPERAND_INDEX;
-    } else {
-      // We ran into something like:
-      // ParentStackElem.first += ... + ExtraArg + ...
-      ExtraArgs[ParentStackElem.first] = ExtraArg;
-    }
-  }
-
   /// Creates reduction operation with the current opcode.
   static Value *createOp(IRBuilder<> &Builder, RecurKind Kind, Value *LHS,
                          Value *RHS, const Twine &Name, bool UseSelect) {
@@ -9429,7 +9542,7 @@ class HorizontalReduction {
   /// Creates reduction operation with the current opcode with the IR flags
   /// from \p I.
   static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
-                         Value *RHS, const Twine &Name, Instruction *I) {
+                         Value *RHS, const Twine &Name, Value *I) {
     auto *SelI = dyn_cast<SelectInst>(I);
     Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, SelI != nullptr);
     if (SelI && RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
@@ -9440,8 +9553,10 @@ class HorizontalReduction {
     return Op;
   }
 
-  static RecurKind getRdxKind(Instruction *I) {
-    assert(I && "Expected instruction for reduction matching");
+  static RecurKind getRdxKind(Value *V) {
+    auto *I = dyn_cast<Instruction>(V);
+    if (!I)
+      return RecurKind::None;
     if (match(I, m_Add(m_Value(), m_Value())))
       return RecurKind::Add;
     if (match(I, m_Mul(m_Value(), m_Value())))
@@ -9603,7 +9718,9 @@ public:
   HorizontalReduction() = default;
 
   /// Try to find a reduction tree.
-  bool matchAssociativeReduction(PHINode *Phi, Instruction *Inst) {
+  bool matchAssociativeReduction(PHINode *Phi, Instruction *Inst,
+                                 ScalarEvolution &SE, const DataLayout &DL,
+                                 const TargetLibraryInfo &TLI) {
     assert((!Phi || is_contained(Phi->operands(), Inst)) &&
            "Phi needs to use the binary operator");
     assert((isa<BinaryOperator>(Inst) || isa<SelectInst>(Inst) ||
@@ -9647,124 +9764,168 @@ public:
 
     ReductionRoot = Inst;
 
-    // The opcode for leaf values that we perform a reduction on.
-    // For example: load(x) + load(y) + load(z) + fptoui(w)
-    // The leaf opcode for 'w' does not match, so we don't include it as a
-    // potential candidate for the reduction.
-    unsigned LeafOpcode = 0;
-
-    // Post-order traverse the reduction tree starting at Inst. We only handle
-    // true trees containing binary operators or selects.
-    SmallVector<std::pair<Instruction *, unsigned>, 32> Stack;
-    Stack.push_back(std::make_pair(Inst, getFirstOperandIndex(Inst)));
-    initReductionOps(Inst);
-    while (!Stack.empty()) {
-      Instruction *TreeN = Stack.back().first;
-      unsigned EdgeToVisit = Stack.back().second++;
-      const RecurKind TreeRdxKind = getRdxKind(TreeN);
-      bool IsReducedValue = TreeRdxKind != RdxKind;
-
-      // Postorder visit.
-      if (IsReducedValue || EdgeToVisit >= getNumberOfOperands(TreeN)) {
-        if (IsReducedValue)
-          ReducedVals.push_back(TreeN);
-        else {
-          auto ExtraArgsIter = ExtraArgs.find(TreeN);
-          if (ExtraArgsIter != ExtraArgs.end() && !ExtraArgsIter->second) {
-            // Check if TreeN is an extra argument of its parent operation.
-            if (Stack.size() <= 1) {
-              // TreeN can't be an extra argument as it is a root reduction
-              // operation.
-              return false;
-            }
-            // Yes, TreeN is an extra argument, do not add it to a list of
-            // reduction operations.
-            // Stack[Stack.size() - 2] always points to the parent operation.
-            markExtraArg(Stack[Stack.size() - 2], TreeN);
-            ExtraArgs.erase(TreeN);
-          } else
-            addReductionOps(TreeN);
+    // Iterate through all the operands of the possible reduction tree and
+    // gather all the reduced values, sorting them by their value id.
+    BasicBlock *BB = Inst->getParent();
+    bool IsCmpSelMinMax = isCmpSelMinMax(Inst);
+    SmallVector<Instruction *> Worklist(1, Inst);
+    // Checks if the operands of the \p TreeN instruction are also reduction
+    // operations or should be treated as reduced values or an extra argument,
+    // which is not part of the reduction.
+    auto &&CheckOperands = [this, IsCmpSelMinMax,
+                            BB](Instruction *TreeN,
+                                SmallVectorImpl<Value *> &ExtraArgs,
+                                SmallVectorImpl<Value *> &PossibleReducedVals,
+                                SmallVectorImpl<Instruction *> &ReductionOps) {
+      for (int I = getFirstOperandIndex(TreeN),
+               End = getNumberOfOperands(TreeN);
+           I < End; ++I) {
+        Value *EdgeVal = getRdxOperand(TreeN, I);
+        ReducedValsToOps[EdgeVal].push_back(TreeN);
+        auto *EdgeInst = dyn_cast<Instruction>(EdgeVal);
+        // Edge has wrong parent - mark as an extra argument.
+        if (EdgeInst && !isVectorLikeInstWithConstOps(EdgeInst) &&
+            !hasSameParent(EdgeInst, BB)) {
+          ExtraArgs.push_back(EdgeVal);
+          continue;
         }
-        // Retract.
-        Stack.pop_back();
-        continue;
-      }
-
-      // Visit operands.
-      Value *EdgeVal = getRdxOperand(TreeN, EdgeToVisit);
-      auto *EdgeInst = dyn_cast<Instruction>(EdgeVal);
-      if (!EdgeInst) {
-        // Edge value is not a reduction instruction or a leaf instruction.
-        // (It may be a constant, function argument, or something else.)
-        markExtraArg(Stack.back(), EdgeVal);
-        continue;
+        // If the edge is not an instruction, or it is different from the main
+        // reduction opcode or has too many uses - possible reduced value.
+        if (!EdgeInst || getRdxKind(EdgeInst) != RdxKind ||
+            !hasRequiredNumberOfUses(IsCmpSelMinMax, EdgeInst) ||
+            !isVectorizable(getRdxKind(EdgeInst), EdgeInst)) {
+          PossibleReducedVals.push_back(EdgeVal);
+          continue;
+        }
+        ReductionOps.push_back(EdgeInst);
       }
-      RecurKind EdgeRdxKind = getRdxKind(EdgeInst);
-      // Continue analysis if the next operand is a reduction operation or
-      // (possibly) a leaf value. If the leaf value opcode is not set,
-      // the first met operation != reduction operation is considered as the
-      // leaf opcode.
-      // Only handle trees in the current basic block.
-      // Each tree node needs to have minimal number of users except for the
-      // ultimate reduction.
-      const bool IsRdxInst = EdgeRdxKind == RdxKind;
-      if (EdgeInst != Phi && EdgeInst != Inst &&
-          hasSameParent(EdgeInst, Inst->getParent()) &&
-          hasRequiredNumberOfUses(isCmpSelMinMax(Inst), EdgeInst) &&
-          (!LeafOpcode || LeafOpcode == EdgeInst->getOpcode() || IsRdxInst)) {
-        if (IsRdxInst) {
-          // We need to be able to reassociate the reduction operations.
-          if (!isVectorizable(EdgeRdxKind, EdgeInst)) {
-            // I is an extra argument for TreeN (its parent operation).
-            markExtraArg(Stack.back(), EdgeInst);
-            continue;
-          }
-        } else if (!LeafOpcode) {
-          LeafOpcode = EdgeInst->getOpcode();
+    };
+    // Try to regroup reduced values so that it gets more profitable to try to
+    // reduce them. Values are grouped by their value ids, instructions - by
+    // instruction op id and/or alternate op id, plus do extra analysis for
+    // loads (grouping them by the distabce between pointers) and cmp
+    // instructions (grouping them by the predicate).
+    MapVector<size_t, MapVector<size_t, MapVector<Value *, unsigned>>>
+        PossibleReducedVals;
+    initReductionOps(Inst);
+    while (!Worklist.empty()) {
+      Instruction *TreeN = Worklist.pop_back_val();
+      SmallVector<Value *> Args;
+      SmallVector<Value *> PossibleRedVals;
+      SmallVector<Instruction *> PossibleReductionOps;
+      CheckOperands(TreeN, Args, PossibleRedVals, PossibleReductionOps);
+      // If too many extra args - mark the instruction itself as a reduction
+      // value, not a reduction operation.
+      if (Args.size() < 2) {
+        addReductionOps(TreeN);
+        // Add extra args.
+        if (!Args.empty()) {
+          assert(Args.size() == 1 && "Expected only single argument.");
+          ExtraArgs[TreeN] = Args.front();
         }
-        Stack.push_back(
-            std::make_pair(EdgeInst, getFirstOperandIndex(EdgeInst)));
-        continue;
+        // Add reduction values. The values are sorted for better vectorization
+        // results.
+        for (Value *V : PossibleRedVals) {
+          size_t Key, Idx;
+          std::tie(Key, Idx) = generateKeySubkey(
+              V, &TLI,
+              [&PossibleReducedVals, &DL, &SE](size_t Key, LoadInst *LI) {
+                for (const auto &LoadData : PossibleReducedVals[Key]) {
+                  auto *RLI = cast<LoadInst>(LoadData.second.front().first);
+                  if (getPointersDiff(RLI->getType(), RLI->getPointerOperand(),
+                                      LI->getType(), LI->getPointerOperand(),
+                                      DL, SE, /*StrictCheck=*/true))
+                    return hash_value(RLI->getPointerOperand());
+                }
+                return hash_value(LI->getPointerOperand());
+              },
+              /*AllowAlternate=*/false);
+          ++PossibleReducedVals[Key][Idx]
+                .insert(std::make_pair(V, 0))
+                .first->second;
+        }
+        Worklist.append(PossibleReductionOps.rbegin(),
+                        PossibleReductionOps.rend());
+      } else {
+        size_t Key, Idx;
+        std::tie(Key, Idx) = generateKeySubkey(
+            TreeN, &TLI,
+            [&PossibleReducedVals, &DL, &SE](size_t Key, LoadInst *LI) {
+              for (const auto &LoadData : PossibleReducedVals[Key]) {
+                auto *RLI = cast<LoadInst>(LoadData.second.front().first);
+                if (getPointersDiff(RLI->getType(), RLI->getPointerOperand(),
+                                    LI->getType(), LI->getPointerOperand(), DL,
+                                    SE, /*StrictCheck=*/true))
+                  return hash_value(RLI->getPointerOperand());
+              }
+              return hash_value(LI->getPointerOperand());
+            },
+            /*AllowAlternate=*/false);
+        ++PossibleReducedVals[Key][Idx]
+              .insert(std::make_pair(TreeN, 0))
+              .first->second;
       }
-      // I is an extra argument for TreeN (its parent operation).
-      markExtraArg(Stack.back(), EdgeInst);
     }
+    auto PossibleReducedValsVect = PossibleReducedVals.takeVector();
+    // Sort values by the total number of values kinds to start the reduction
+    // from the longest possible reduced values sequences.
+    for (auto &PossibleReducedVals : PossibleReducedValsVect) {
+      auto PossibleRedVals = PossibleReducedVals.second.takeVector();
+      SmallVector<SmallVector<Value *>> PossibleRedValsVect;
+      for (auto It = PossibleRedVals.begin(), E = PossibleRedVals.end();
+           It != E; ++It) {
+        PossibleRedValsVect.emplace_back();
+        auto RedValsVect = It->second.takeVector();
+        stable_sort(RedValsVect, [](const auto &P1, const auto &P2) {
+          return P1.second < P2.second;
+        });
+        for (const std::pair<Value *, unsigned> &Data : RedValsVect)
+          PossibleRedValsVect.back().append(Data.second, Data.first);
+      }
+      stable_sort(PossibleRedValsVect, [](const auto &P1, const auto &P2) {
+        return P1.size() > P2.size();
+      });
+      ReducedVals.emplace_back();
+      for (ArrayRef<Value *> Data : PossibleRedValsVect)
+        ReducedVals.back().append(Data.rbegin(), Data.rend());
+    }
+    // Sort the reduced values by number of same/alternate opcode and/or pointer
+    // operand.
+    stable_sort(ReducedVals, [](ArrayRef<Value *> P1, ArrayRef<Value *> P2) {
+      return P1.size() > P2.size();
+    });
     return true;
   }
 
   /// Attempt to vectorize the tree found by matchAssociativeReduction.
   Value *tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
+    constexpr int ReductionLimit = 4;
     // If there are a sufficient number of reduction values, reduce
     // to a nearby power-of-2. We can safely generate oversized
     // vectors and rely on the backend to split them to legal sizes.
-    unsigned NumReducedVals = ReducedVals.size();
-    if (NumReducedVals < 4)
+    unsigned NumReducedVals = std::accumulate(
+        ReducedVals.begin(), ReducedVals.end(), 0,
+        [](int Num, ArrayRef<Value *> Vals) { return Num + Vals.size(); });
+    if (NumReducedVals < ReductionLimit)
       return nullptr;
 
-    // Intersect the fast-math-flags from all reduction operations.
-    FastMathFlags RdxFMF;
-    RdxFMF.set();
-    for (ReductionOpsType &RdxOp : ReductionOps) {
-      for (Value *RdxVal : RdxOp) {
-        if (auto *FPMO = dyn_cast<FPMathOperator>(RdxVal))
-          RdxFMF &= FPMO->getFastMathFlags();
-      }
-    }
-
     IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
-    Builder.setFastMathFlags(RdxFMF);
 
+    // Track the reduced values in case if they are replaced by extractelement
+    // because of the vectorization.
+    DenseMap<Value *, WeakTrackingVH> TrackedVals;
     BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
     // The same extra argument may be used several times, so log each attempt
     // to use it.
     for (const std::pair<Instruction *, Value *> &Pair : ExtraArgs) {
       assert(Pair.first && "DebugLoc must be set.");
       ExternallyUsedValues[Pair.second].push_back(Pair.first);
+      TrackedVals.try_emplace(Pair.second, Pair.second);
     }
 
     // The compare instruction of a min/max is the insertion point for new
     // instructions and may be replaced with a new compare instruction.
-    auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
+    auto &&GetCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
       assert(isa<SelectInst>(RdxRootInst) &&
              "Expected min/max reduction to have select root instruction");
       Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition();
@@ -9776,141 +9937,289 @@ public:
     // The reduction root is used as the insertion point for new instructions,
     // so set it as externally used to prevent it from being deleted.
     ExternallyUsedValues[ReductionRoot];
-    SmallVector<Value *, 16> IgnoreList;
-    for (ReductionOpsType &RdxOp : ReductionOps)
-      IgnoreList.append(RdxOp.begin(), RdxOp.end());
-
-    unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
-    if (NumReducedVals > ReduxWidth) {
-      // In the loop below, we are building a tree based on a window of
-      // 'ReduxWidth' values.
-      // If the operands of those values have common traits (compare predicate,
-      // constant operand, etc), then we want to group those together to
-      // minimize the cost of the reduction.
-
-      // TODO: This should be extended to count common operands for
-      //       compares and binops.
-
-      // Step 1: Count the number of times each compare predicate occurs.
-      SmallDenseMap<unsigned, unsigned> PredCountMap;
-      for (Value *RdxVal : ReducedVals) {
-        CmpInst::Predicate Pred;
-        if (match(RdxVal, m_Cmp(Pred, m_Value(), m_Value())))
-          ++PredCountMap[Pred];
-      }
-      // Step 2: Sort the values so the most common predicates come first.
-      stable_sort(ReducedVals, [&PredCountMap](Value *A, Value *B) {
-        CmpInst::Predicate PredA, PredB;
-        if (match(A, m_Cmp(PredA, m_Value(), m_Value())) &&
-            match(B, m_Cmp(PredB, m_Value(), m_Value()))) {
-          return PredCountMap[PredA] > PredCountMap[PredB];
-        }
-        return false;
-      });
-    }
+    SmallVector<Value *> IgnoreList;
+    for (ReductionOpsType &RdxOps : ReductionOps)
+      for (Value *RdxOp : RdxOps) {
+        if (!RdxOp)
+          continue;
+        IgnoreList.push_back(RdxOp);
+      }
+    bool IsCmpSelMinMax = isCmpSelMinMax(cast<Instruction>(ReductionRoot));
+
+    // Need to track reduced vals, they may be changed during vectorization of
+    // subvectors.
+    for (ArrayRef<Value *> Candidates : ReducedVals)
+      for (Value *V : Candidates)
+        TrackedVals.try_emplace(V, V);
 
+    DenseMap<Value *, unsigned> VectorizedVals;
     Value *VectorizedTree = nullptr;
-    unsigned i = 0;
-    while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
-      ArrayRef<Value *> VL(&ReducedVals[i], ReduxWidth);
-      V.buildTree(VL, IgnoreList);
-      if (V.isTreeTinyAndNotFullyVectorizable(/*ForReduction=*/true))
-        break;
-      if (V.isLoadCombineReductionCandidate(RdxKind))
-        break;
-      V.reorderTopToBottom();
-      V.reorderBottomToTop(/*IgnoreReorder=*/true);
-      V.buildExternalUses(ExternallyUsedValues);
-
-      // For a poison-safe boolean logic reduction, do not replace select
-      // instructions with logic ops. All reduced values will be frozen (see
-      // below) to prevent leaking poison.
-      if (isa<SelectInst>(ReductionRoot) &&
-          isBoolLogicOp(cast<Instruction>(ReductionRoot)) &&
-          NumReducedVals != ReduxWidth)
-        break;
+    bool CheckForReusedReductionOps = false;
+    // Try to vectorize elements based on their type.
+    for (unsigned I = 0, E = ReducedVals.size(); I < E; ++I) {
+      ArrayRef<Value *> OrigReducedVals = ReducedVals[I];
+      InstructionsState S = getSameOpcode(OrigReducedVals);
+      SmallVector<Value *> Candidates;
+      DenseMap<Value *, Value *> TrackedToOrig;
+      for (unsigned Cnt = 0, Sz = OrigReducedVals.size(); Cnt < Sz; ++Cnt) {
+        Value *RdxVal = TrackedVals.find(OrigReducedVals[Cnt])->second;
+        // Check if the reduction value was not overriden by the extractelement
+        // instruction because of the vectorization and exclude it, if it is not
+        // compatible with other values.
+        if (auto *Inst = dyn_cast<Instruction>(RdxVal))
+          if (isVectorLikeInstWithConstOps(Inst) &&
+              (!S.getOpcode() || !S.isOpcodeOrAlt(Inst)))
+            continue;
+        Candidates.push_back(RdxVal);
+        TrackedToOrig.try_emplace(RdxVal, OrigReducedVals[Cnt]);
+      }
+      bool ShuffledExtracts = false;
+      // Try to handle shuffled extractelements.
+      if (S.getOpcode() == Instruction::ExtractElement && !S.isAltShuffle() &&
+          I + 1 < E) {
+        InstructionsState NextS = getSameOpcode(ReducedVals[I + 1]);
+        if (NextS.getOpcode() == Instruction::ExtractElement &&
+            !NextS.isAltShuffle()) {
+          SmallVector<Value *> CommonCandidates(Candidates);
+          for (Value *RV : ReducedVals[I + 1]) {
+            Value *RdxVal = TrackedVals.find(RV)->second;
+            // Check if the reduction value was not overriden by the
+            // extractelement instruction because of the vectorization and
+            // exclude it, if it is not compatible with other values.
+            if (auto *Inst = dyn_cast<Instruction>(RdxVal))
+              if (!NextS.getOpcode() || !NextS.isOpcodeOrAlt(Inst))
+                continue;
+            CommonCandidates.push_back(RdxVal);
+            TrackedToOrig.try_emplace(RdxVal, RV);
+          }
+          SmallVector<int> Mask;
+          if (isFixedVectorShuffle(CommonCandidates, Mask)) {
+            ++I;
+            Candidates.swap(CommonCandidates);
+            ShuffledExtracts = true;
+          }
+        }
+      }
+      unsigned NumReducedVals = Candidates.size();
+      if (NumReducedVals < ReductionLimit)
+        continue;
 
-      V.computeMinimumValueSizes();
+      unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
+      unsigned Start = 0;
+      unsigned Pos = Start;
+      // Restarts vectorization attempt with lower vector factor.
+      unsigned PrevReduxWidth = ReduxWidth;
+      bool CheckForReusedReductionOpsLocal = false;
+      auto &&AdjustReducedVals = [&Pos, &Start, &ReduxWidth, NumReducedVals,
+                                  &CheckForReusedReductionOpsLocal,
+                                  &PrevReduxWidth, &V,
+                                  &IgnoreList](bool IgnoreVL = false) {
+        bool IsAnyRedOpGathered =
+            !IgnoreVL && any_of(IgnoreList, [&V](Value *RedOp) {
+              return V.isGathered(RedOp);
+            });
+        if (!CheckForReusedReductionOpsLocal && PrevReduxWidth == ReduxWidth) {
+          // Check if any of the reduction ops are gathered. If so, worth
+          // trying again with less number of reduction ops.
+          CheckForReusedReductionOpsLocal |= IsAnyRedOpGathered;
+        }
+        ++Pos;
+        if (Pos < NumReducedVals - ReduxWidth + 1)
+          return IsAnyRedOpGathered;
+        Pos = Start;
+        ReduxWidth /= 2;
+        return IsAnyRedOpGathered;
+      };
+      while (Pos < NumReducedVals - ReduxWidth + 1 &&
+             ReduxWidth >= ReductionLimit) {
+        // Dependency in tree of the reduction ops - drop this attempt, try
+        // later.
+        if (CheckForReusedReductionOpsLocal && PrevReduxWidth != ReduxWidth &&
+            Start == 0) {
+          CheckForReusedReductionOps = true;
+          break;
+        }
+        PrevReduxWidth = ReduxWidth;
+        ArrayRef<Value *> VL(std::next(Candidates.begin(), Pos), ReduxWidth);
+        // Beeing analyzed already - skip.
+        if (V.areAnalyzedReductionVals(VL)) {
+          (void)AdjustReducedVals(/*IgnoreVL=*/true);
+          continue;
+        }
+        // Early exit if any of the reduction values were deleted during
+        // previous vectorization attempts.
+        if (any_of(VL, [&V](Value *RedVal) {
+              auto *RedValI = dyn_cast<Instruction>(RedVal);
+              if (!RedValI)
+                return false;
+              return V.isDeleted(RedValI);
+            }))
+          break;
+        V.buildTree(VL, IgnoreList);
+        if (V.isTreeTinyAndNotFullyVectorizable(/*ForReduction=*/true)) {
+          if (!AdjustReducedVals())
+            V.analyzedReductionVals(VL);
+          continue;
+        }
+        if (V.isLoadCombineReductionCandidate(RdxKind)) {
+          if (!AdjustReducedVals())
+            V.analyzedReductionVals(VL);
+          continue;
+        }
+        V.reorderTopToBottom();
+        // No need to reorder the root node at all.
+        V.reorderBottomToTop(/*IgnoreReorder=*/true);
+        // Keep extracted other reduction values, if they are used in the
+        // vectorization trees.
+        BoUpSLP::ExtraValueToDebugLocsMap LocalExternallyUsedValues(
+            ExternallyUsedValues);
+        for (unsigned Cnt = 0, Sz = ReducedVals.size(); Cnt < Sz; ++Cnt) {
+          if (Cnt == I || (ShuffledExtracts && Cnt == I - 1))
+            continue;
+          for_each(ReducedVals[Cnt],
+                   [&LocalExternallyUsedValues, &TrackedVals](Value *V) {
+                     if (isa<Instruction>(V))
+                       LocalExternallyUsedValues[TrackedVals[V]];
+                   });
+        }
+        for (unsigned Cnt = 0; Cnt < NumReducedVals; ++Cnt) {
+          if (Cnt >= Pos && Cnt < Pos + ReduxWidth)
+            continue;
+          if (VectorizedVals.count(Candidates[Cnt]))
+            continue;
+          LocalExternallyUsedValues[Candidates[Cnt]];
+        }
+        V.buildExternalUses(LocalExternallyUsedValues);
+
+        V.computeMinimumValueSizes();
+
+        // Intersect the fast-math-flags from all reduction operations.
+        FastMathFlags RdxFMF;
+        RdxFMF.set();
+        for (Value *U : IgnoreList)
+          if (auto *FPMO = dyn_cast<FPMathOperator>(U))
+            RdxFMF &= FPMO->getFastMathFlags();
+        // Estimate cost.
+        InstructionCost TreeCost = V.getTreeCost(VL);
+        InstructionCost ReductionCost =
+            getReductionCost(TTI, VL[0], ReduxWidth, RdxFMF);
+        InstructionCost Cost = TreeCost + ReductionCost;
+        if (!Cost.isValid()) {
+          LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n");
+          return nullptr;
+        }
+        if (Cost >= -SLPCostThreshold) {
+          V.getORE()->emit([&]() {
+            return OptimizationRemarkMissed(
+                       SV_NAME, "HorSLPNotBeneficial",
+                       ReducedValsToOps.find(VL[0])->second.front())
+                   << "Vectorizing horizontal reduction is possible"
+                   << "but not beneficial with cost " << ore::NV("Cost", Cost)
+                   << " and threshold "
+                   << ore::NV("Threshold", -SLPCostThreshold);
+          });
+          if (!AdjustReducedVals())
+            V.analyzedReductionVals(VL);
+          continue;
+        }
 
-      // Estimate cost.
-      InstructionCost TreeCost =
-          V.getTreeCost(makeArrayRef(&ReducedVals[i], ReduxWidth));
-      InstructionCost ReductionCost =
-          getReductionCost(TTI, ReducedVals[i], ReduxWidth, RdxFMF);
-      InstructionCost Cost = TreeCost + ReductionCost;
-      if (!Cost.isValid()) {
-        LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n");
-        return nullptr;
-      }
-      if (Cost >= -SLPCostThreshold) {
+        LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:"
+                          << Cost << ". (HorRdx)\n");
         V.getORE()->emit([&]() {
-          return OptimizationRemarkMissed(SV_NAME, "HorSLPNotBeneficial",
-                                          cast<Instruction>(VL[0]))
-                 << "Vectorizing horizontal reduction is possible"
-                 << "but not beneficial with cost " << ore::NV("Cost", Cost)
-                 << " and threshold "
-                 << ore::NV("Threshold", -SLPCostThreshold);
+          return OptimizationRemark(
+                     SV_NAME, "VectorizedHorizontalReduction",
+                     ReducedValsToOps.find(VL[0])->second.front())
+                 << "Vectorized horizontal reduction with cost "
+                 << ore::NV("Cost", Cost) << " and with tree size "
+                 << ore::NV("TreeSize", V.getTreeSize());
         });
-        break;
-      }
 
-      LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:"
-                        << Cost << ". (HorRdx)\n");
-      V.getORE()->emit([&]() {
-        return OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction",
-                                  cast<Instruction>(VL[0]))
-               << "Vectorized horizontal reduction with cost "
-               << ore::NV("Cost", Cost) << " and with tree size "
-               << ore::NV("TreeSize", V.getTreeSize());
-      });
+        Builder.setFastMathFlags(RdxFMF);
 
-      // Vectorize a tree.
-      DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
-      Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
+        // Vectorize a tree.
+        Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues);
 
-      // Emit a reduction. If the root is a select (min/max idiom), the insert
-      // point is the compare condition of that select.
-      Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
-      if (isCmpSelMinMax(RdxRootInst))
-        Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst));
-      else
-        Builder.SetInsertPoint(RdxRootInst);
+        // Emit a reduction. If the root is a select (min/max idiom), the insert
+        // point is the compare condition of that select.
+        Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
+        if (IsCmpSelMinMax)
+          Builder.SetInsertPoint(GetCmpForMinMaxReduction(RdxRootInst));
+        else
+          Builder.SetInsertPoint(RdxRootInst);
 
-      // To prevent poison from leaking across what used to be sequential, safe,
-      // scalar boolean logic operations, the reduction operand must be frozen.
-      if (isa<SelectInst>(RdxRootInst) && isBoolLogicOp(RdxRootInst))
-        VectorizedRoot = Builder.CreateFreeze(VectorizedRoot);
+        // To prevent poison from leaking across what used to be sequential,
+        // safe, scalar boolean logic operations, the reduction operand must be
+        // frozen.
+        if (isa<SelectInst>(RdxRootInst) && isBoolLogicOp(RdxRootInst))
+          VectorizedRoot = Builder.CreateFreeze(VectorizedRoot);
 
-      Value *ReducedSubTree =
-          emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
+        Value *ReducedSubTree =
+            emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
 
-      if (!VectorizedTree) {
-        // Initialize the final value in the reduction.
-        VectorizedTree = ReducedSubTree;
-      } else {
-        // Update the final value in the reduction.
-        Builder.SetCurrentDebugLocation(Loc);
-        VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
-                                  ReducedSubTree, "op.rdx", ReductionOps);
+        if (!VectorizedTree) {
+          // Initialize the final value in the reduction.
+          VectorizedTree = ReducedSubTree;
+        } else {
+          // Update the final value in the reduction.
+          Builder.SetCurrentDebugLocation(
+              cast<Instruction>(ReductionOps.front().front())->getDebugLoc());
+          VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
+                                    ReducedSubTree, "op.rdx", ReductionOps);
+        }
+        // Count vectorized reduced values to exclude them from final reduction.
+        for (Value *V : VL)
+          ++VectorizedVals.try_emplace(TrackedToOrig.find(V)->second, 0)
+                .first->getSecond();
+        Pos += ReduxWidth;
+        Start = Pos;
+        ReduxWidth = PowerOf2Floor(NumReducedVals - Pos);
       }
-      i += ReduxWidth;
-      ReduxWidth = PowerOf2Floor(NumReducedVals - i);
     }
-
     if (VectorizedTree) {
       // Finish the reduction.
-      for (; i < NumReducedVals; ++i) {
-        auto *I = cast<Instruction>(ReducedVals[i]);
-        Builder.SetCurrentDebugLocation(I->getDebugLoc());
-        VectorizedTree =
-            createOp(Builder, RdxKind, VectorizedTree, I, "", ReductionOps);
+      // Need to add extra arguments and not vectorized possible reduction
+      // values.
+      SmallPtrSet<Value *, 8> Visited;
+      for (unsigned I = 0, E = ReducedVals.size(); I < E; ++I) {
+        ArrayRef<Value *> Candidates = ReducedVals[I];
+        for (Value *RdxVal : Candidates) {
+          if (!Visited.insert(RdxVal).second)
+            continue;
+          Value *StableRdxVal = RdxVal;
+          auto TVIt = TrackedVals.find(RdxVal);
+          if (TVIt != TrackedVals.end())
+            StableRdxVal = TVIt->second;
+          unsigned NumOps = 0;
+          auto It = VectorizedVals.find(RdxVal);
+          if (It != VectorizedVals.end())
+            NumOps = It->second;
+          for (Instruction *RedOp :
+               makeArrayRef(ReducedValsToOps.find(RdxVal)->second)
+                   .drop_back(NumOps)) {
+            Builder.SetCurrentDebugLocation(RedOp->getDebugLoc());
+            ReductionOpsListType Ops;
+            if (auto *Sel = dyn_cast<SelectInst>(RedOp))
+              Ops.emplace_back().push_back(Sel->getCondition());
+            Ops.emplace_back().push_back(RedOp);
+            VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
+                                      StableRdxVal, "op.rdx", Ops);
+          }
+        }
       }
       for (auto &Pair : ExternallyUsedValues) {
         // Add each externally used value to the final reduction.
         for (auto *I : Pair.second) {
           Builder.SetCurrentDebugLocation(I->getDebugLoc());
+          ReductionOpsListType Ops;
+          if (auto *Sel = dyn_cast<SelectInst>(I))
+            Ops.emplace_back().push_back(Sel->getCondition());
+          Ops.emplace_back().push_back(I);
+          Value *StableRdxVal = Pair.first;
+          auto TVIt = TrackedVals.find(Pair.first);
+          if (TVIt != TrackedVals.end())
+            StableRdxVal = TVIt->second;
           VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
-                                    Pair.first, "op.extra", I);
+                                    StableRdxVal, "op.rdx", Ops);
         }
       }
 
@@ -9922,20 +10231,30 @@ public:
       // deletion.
 #ifndef NDEBUG
       SmallSet<Value *, 4> IgnoreSet;
-      IgnoreSet.insert(IgnoreList.begin(), IgnoreList.end());
+      for (ArrayRef<Value *> RdxOps : ReductionOps)
+        IgnoreSet.insert(RdxOps.begin(), RdxOps.end());
 #endif
-      for (auto *Ignore : IgnoreList) {
+      for (ArrayRef<Value *> RdxOps : ReductionOps) {
+        for (Value *Ignore : RdxOps) {
+          if (!Ignore)
+            continue;
 #ifndef NDEBUG
-        for (auto *U : Ignore->users()) {
-          assert(IgnoreSet.count(U));
-        }
+          for (auto *U : Ignore->users()) {
+            assert(IgnoreSet.count(U) &&
+                   "All users must be either in the reduction ops list.");
+          }
 #endif
-        if (!Ignore->use_empty()) {
-          Value *Undef = UndefValue::get(Ignore->getType());
-          Ignore->replaceAllUsesWith(Undef);
+          if (!Ignore->use_empty()) {
+            Value *Undef = UndefValue::get(Ignore->getType());
+            Ignore->replaceAllUsesWith(Undef);
+          }
+          V.eraseInstruction(cast<Instruction>(Ignore));
         }
-        V.eraseInstruction(cast<Instruction>(Ignore));
       }
+    } else if (!CheckForReusedReductionOps) {
+      for (ReductionOpsType &RdxOps : ReductionOps)
+        for (Value *RdxOp : RdxOps)
+          V.analyzedReductionRoot(cast<Instruction>(RdxOp));
     }
     return VectorizedTree;
   }
@@ -10201,7 +10520,8 @@ static bool matchRdxBop(Instruction *I, Value *&V0, Value *&V1) {
 /// performed.
 static bool tryToVectorizeHorReductionOrInstOperands(
     PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
-    TargetTransformInfo *TTI,
+    TargetTransformInfo *TTI, ScalarEvolution &SE, const DataLayout &DL,
+    const TargetLibraryInfo &TLI,
     const function_ref<bool(Instruction *, BoUpSLP &)> Vectorize) {
   if (!ShouldVectorizeHor)
     return false;
@@ -10220,7 +10540,7 @@ static bool tryToVectorizeHorReductionOrInstOperands(
   // horizontal reduction.
   // Interrupt the process if the Root instruction itself was vectorized or all
   // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
-  // Skip the analysis of CmpInsts.Compiler implements postanalysis of the
+  // Skip the analysis of CmpInsts. Compiler implements postanalysis of the
   // CmpInsts so we can skip extra attempts in
   // tryToVectorizeHorReductionOrInstOperands and save compile time.
   std::queue<std::pair<Instruction *, unsigned>> Stack;
@@ -10228,13 +10548,16 @@ static bool tryToVectorizeHorReductionOrInstOperands(
   SmallPtrSet<Value *, 8> VisitedInstrs;
   SmallVector<WeakTrackingVH> PostponedInsts;
   bool Res = false;
-  auto &&TryToReduce = [TTI, &P, &R](Instruction *Inst, Value *&B0,
-                                     Value *&B1) -> Value * {
+  auto &&TryToReduce = [TTI, &SE, &DL, &P, &R, &TLI](Instruction *Inst,
+                                                     Value *&B0,
+                                                     Value *&B1) -> Value * {
+    if (R.isAnalizedReductionRoot(Inst))
+      return nullptr;
     bool IsBinop = matchRdxBop(Inst, B0, B1);
     bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
     if (IsBinop || IsSelect) {
       HorizontalReduction HorRdx;
-      if (HorRdx.matchAssociativeReduction(P, Inst))
+      if (HorRdx.matchAssociativeReduction(P, Inst, SE, DL, TLI))
         return HorRdx.tryToReduce(R, TTI);
     }
     return nullptr;
@@ -10279,7 +10602,7 @@ static bool tryToVectorizeHorReductionOrInstOperands(
       // Do not try to vectorize CmpInst operands, this is done separately.
       // Final attempt for binop args vectorization should happen after the loop
       // to try to find reductions.
-      if (!isa<CmpInst>(Inst))
+      if (!isa<CmpInst, InsertElementInst, InsertValueInst>(Inst))
         PostponedInsts.push_back(Inst);
     }
 
@@ -10292,8 +10615,8 @@ static bool tryToVectorizeHorReductionOrInstOperands(
           if (auto *I = dyn_cast<Instruction>(Op))
             // Do not try to vectorize CmpInst operands,  this is done
             // separately.
-            if (!isa<PHINode>(I) && !isa<CmpInst>(I) && !R.isDeleted(I) &&
-                I->getParent() == BB)
+            if (!isa<PHINode, CmpInst, InsertElementInst, InsertValueInst>(I) &&
+                !R.isDeleted(I) && I->getParent() == BB)
               Stack.emplace(I, Level);
   }
   // Try to vectorized binops where reductions were not found.
@@ -10317,8 +10640,8 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
   auto &&ExtraVectorization = [this](Instruction *I, BoUpSLP &R) -> bool {
     return tryToVectorize(I, R);
   };
-  return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI,
-                                                  ExtraVectorization);
+  return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI, *SE, *DL,
+                                                  *TLI, ExtraVectorization);
 }
 
 bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
@@ -10486,12 +10809,16 @@ bool SLPVectorizerPass::vectorizeSimpleInstructions(
   for (auto *I : reverse(Instructions)) {
     if (R.isDeleted(I))
       continue;
-    if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
+    if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I)) {
       OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
-    else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I))
+    } else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I)) {
       OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R);
-    else if (isa<CmpInst>(I))
+    } else if (isa<CmpInst>(I)) {
       PostponedCmps.push_back(I);
+      continue;
+    }
+    // Try to find reductions in buildvector sequnces.
+    OpsChanged |= vectorizeRootInstruction(nullptr, I, BB, R, TTI);
   }
   if (AtTerminator) {
     // Try to find reductions first.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 02550dad..21bd231 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1420,6 +1420,9 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
     getCondOp()->printAsOperand(O, SlotTracker);
   }
   O << ")";
+  if (RdxDesc->IntermediateStore)
+    O << " (with final reduction value stored in invariant address sank "
+         "outside of loop)";
 }
 
 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 05fc8c6..f26babe 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -257,12 +257,12 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
 ExtractElementInst *VectorCombine::getShuffleExtract(
     ExtractElementInst *Ext0, ExtractElementInst *Ext1,
     unsigned PreferredExtractIndex = InvalidIndex) const {
-  assert(isa<ConstantInt>(Ext0->getIndexOperand()) &&
-         isa<ConstantInt>(Ext1->getIndexOperand()) &&
-         "Expected constant extract indexes");
+  auto *Index0C = dyn_cast<ConstantInt>(Ext0->getIndexOperand());
+  auto *Index1C = dyn_cast<ConstantInt>(Ext1->getIndexOperand());
+  assert(Index0C && Index1C && "Expected constant extract indexes");
 
-  unsigned Index0 = cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue();
-  unsigned Index1 = cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue();
+  unsigned Index0 = Index0C->getZExtValue();
+  unsigned Index1 = Index1C->getZExtValue();
 
   // If the extract indexes are identical, no shuffle is needed.
   if (Index0 == Index1)
@@ -308,9 +308,10 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
                                           const Instruction &I,
                                           ExtractElementInst *&ConvertToShuffle,
                                           unsigned PreferredExtractIndex) {
-  assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
-         isa<ConstantInt>(Ext1->getOperand(1)) &&
-         "Expected constant extract indexes");
+  auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->getOperand(1));
+  auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->getOperand(1));
+  assert(Ext0IndexC && Ext1IndexC && "Expected constant extract indexes");
+
   unsigned Opcode = I.getOpcode();
   Type *ScalarTy = Ext0->getType();
   auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
@@ -333,8 +334,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
 
   // Get cost estimates for the extract elements. These costs will factor into
   // both sequences.
-  unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
-  unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
+  unsigned Ext0Index = Ext0IndexC->getZExtValue();
+  unsigned Ext1Index = Ext1IndexC->getZExtValue();
 
   InstructionCost Extract0Cost =
       TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
diff --git a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
index 1f5844b..c7635e8 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
@@ -5,105 +5,105 @@
 define void @casts() {
 ; CHECK-LABEL: 'casts'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 180 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 107 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 360 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 368 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
@@ -275,51 +275,51 @@ define void @fp16() {
 ;
 ; CHECK-FP16-LABEL: 'fp16'
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 109 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found an estimated cost of 400 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll
index ca4f51f..10973bb 100644
--- a/llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll
@@ -5,57 +5,63 @@
 ; Verify the cost model for reverse shuffles.
 ;
 
-define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128) {
-; CHECK-LABEL: 'test_vXi32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+;; Reverse shuffles should be lowered to vrev and possibly a vext (for quadwords, on neon)
+define void @reverse() {
+; CHECK-LABEL: 'reverse'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-  %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  ret void
-}
+  %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+  %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 
-define void @test_vXi64(<2 x i64> %src128) {
-; CHECK-LABEL: 'test_vXi64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-  ret void
-}
+  %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 
-define void @test_vXf32(<2 x float> %src64, <4 x float> %src128) {
-; CHECK-LABEL: 'test_vXf32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-  %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  ret void
-}
+  %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+  %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 
-define void @test_vXf64(<2 x double> %src128) {
-; CHECK-LABEL: 'test_vXf64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-  ret void
-}
+  %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+  %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 
-;
-; Tests the cost model for reverse shuffles of second operand.
-;
+  %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+  %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+  %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 
-define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128) {
-; CHECK-LABEL: 'test_upper_vXf32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 2>
-  %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
   ret void
 }
+
+
diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index 1859309..d0c987f 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -281,7 +281,7 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x
 
 define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) {
 ; THRU-LABEL: 'maskedgather'
-; THRU-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; LATE-LABEL: 'maskedgather'
@@ -302,7 +302,7 @@ define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) {
 
 define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) {
 ; THRU-LABEL: 'maskedscatter'
-; THRU-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; LATE-LABEL: 'maskedscatter'
diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
index 36d027b..0dc3f98 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
@@ -882,85 +882,139 @@ define i32 @masked_expandload() {
 
 define i32 @masked_compressstore() {
 ; SSE2-LABEL: 'masked_compressstore'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
 ;
 ; SSE42-LABEL: 'masked_compressstore'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
 ;
-; AVX-LABEL: 'masked_compressstore'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
+; AVX1-LABEL: 'masked_compressstore'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
+;
+; AVX2-LABEL: 'masked_compressstore'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
+;
+; SKL-LABEL: 'masked_compressstore'
+; SKL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
 ;
 ; AVX512-LABEL: 'masked_compressstore'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
@@ -1242,19 +1296,19 @@ define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
 
 define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0)  {
 ; SSE2-LABEL: 'test_gather_2f64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
 ;
 ; SSE42-LABEL: 'test_gather_2f64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
 ;
 ; AVX1-LABEL: 'test_gather_2f64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
 ;
 ; AVX2-LABEL: 'test_gather_2f64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
 ;
 ; SKL-LABEL: 'test_gather_2f64'
@@ -1271,19 +1325,19 @@ define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x
 
 define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0)  {
 ; SSE2-LABEL: 'test_gather_4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 ; SSE42-LABEL: 'test_gather_4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 ; AVX1-LABEL: 'test_gather_4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 ; AVX2-LABEL: 'test_gather_4i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 ; SKL-LABEL: 'test_gather_4i32'
@@ -1383,25 +1437,25 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <
 ; SSE2-LABEL: 'test_gather_16f32_var_mask'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; SSE42-LABEL: 'test_gather_16f32_var_mask'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; AVX1-LABEL: 'test_gather_16f32_var_mask'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; AVX2-LABEL: 'test_gather_16f32_var_mask'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; SKL-LABEL: 'test_gather_16f32_var_mask'
@@ -1427,25 +1481,25 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3
 ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; SSE42-LABEL: 'test_gather_16f32_ra_var_mask'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; SKL-LABEL: 'test_gather_16f32_ra_var_mask'
@@ -1532,7 +1586,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'test_scatter_16i32'
@@ -1540,7 +1594,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'test_scatter_16i32'
@@ -1548,7 +1602,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'test_scatter_16i32'
@@ -1556,7 +1610,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SKL-LABEL: 'test_scatter_16i32'
@@ -1564,7 +1618,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; SKL-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'test_scatter_16i32'
@@ -1586,15 +1640,15 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 
 define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
 ; SSE2-LABEL: 'test_scatter_8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'test_scatter_8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX-LABEL: 'test_scatter_8i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'test_scatter_8i32'
@@ -1607,15 +1661,15 @@ define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
 
 define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
 ; SSE2-LABEL: 'test_scatter_4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'test_scatter_4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX-LABEL: 'test_scatter_4i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; KNL-LABEL: 'test_scatter_4i32'
@@ -1634,25 +1688,25 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask)
 ; SSE2-LABEL: 'test_gather_4f32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
 ;
 ; SSE42-LABEL: 'test_gather_4f32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
 ;
 ; AVX1-LABEL: 'test_gather_4f32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
 ;
 ; AVX2-LABEL: 'test_gather_4f32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
 ;
 ; SKL-LABEL: 'test_gather_4f32'
diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
index a6fb78f..02a279e 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -1266,85 +1266,139 @@ define i32 @masked_expandload() {
 
 define i32 @masked_compressstore() {
 ; SSE2-LABEL: 'masked_compressstore'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
 ;
 ; SSE42-LABEL: 'masked_compressstore'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
 ;
-; AVX-LABEL: 'masked_compressstore'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
+; AVX1-LABEL: 'masked_compressstore'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
+;
+; AVX2-LABEL: 'masked_compressstore'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
+;
+; SKL-LABEL: 'masked_compressstore'
+; SKL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
 ;
 ; AVX512-LABEL: 'masked_compressstore'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
@@ -1626,19 +1680,19 @@ define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
 
 define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0)  {
 ; SSE2-LABEL: 'test_gather_2f64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
 ;
 ; SSE42-LABEL: 'test_gather_2f64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
 ;
 ; AVX1-LABEL: 'test_gather_2f64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
 ;
 ; AVX2-LABEL: 'test_gather_2f64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
 ;
 ; SKL-LABEL: 'test_gather_2f64'
@@ -1655,19 +1709,19 @@ define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x
 
 define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0)  {
 ; SSE2-LABEL: 'test_gather_4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 ; SSE42-LABEL: 'test_gather_4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 ; AVX1-LABEL: 'test_gather_4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 ; AVX2-LABEL: 'test_gather_4i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 ; SKL-LABEL: 'test_gather_4i32'
@@ -1767,25 +1821,25 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <
 ; SSE2-LABEL: 'test_gather_16f32_var_mask'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; SSE42-LABEL: 'test_gather_16f32_var_mask'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; AVX1-LABEL: 'test_gather_16f32_var_mask'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; AVX2-LABEL: 'test_gather_16f32_var_mask'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; SKL-LABEL: 'test_gather_16f32_var_mask'
@@ -1811,25 +1865,25 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3
 ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; SSE42-LABEL: 'test_gather_16f32_ra_var_mask'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
 ;
 ; SKL-LABEL: 'test_gather_16f32_ra_var_mask'
@@ -1916,7 +1970,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'test_scatter_16i32'
@@ -1924,7 +1978,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'test_scatter_16i32'
@@ -1932,7 +1986,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'test_scatter_16i32'
@@ -1940,7 +1994,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SKL-LABEL: 'test_scatter_16i32'
@@ -1948,7 +2002,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; SKL-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'test_scatter_16i32'
@@ -1970,15 +2024,15 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 
 define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
 ; SSE2-LABEL: 'test_scatter_8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'test_scatter_8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX-LABEL: 'test_scatter_8i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'test_scatter_8i32'
@@ -1991,15 +2045,15 @@ define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
 
 define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
 ; SSE2-LABEL: 'test_scatter_4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'test_scatter_4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX-LABEL: 'test_scatter_4i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; KNL-LABEL: 'test_scatter_4i32'
@@ -2018,25 +2072,25 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask)
 ; SSE2-LABEL: 'test_gather_4f32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
 ;
 ; SSE42-LABEL: 'test_gather_4f32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
 ;
 ; AVX1-LABEL: 'test_gather_4f32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
 ;
 ; AVX2-LABEL: 'test_gather_4f32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
 ;
 ; SKL-LABEL: 'test_gather_4f32'
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll
index afb73f4..da38a10 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll
@@ -4,8 +4,8 @@
 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41
 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
-; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
+; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512
 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256
 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQVEC512
@@ -17,70 +17,81 @@
 
 define void @replication_i1_stride2() nounwind "min-legal-vector-width"="256" {
 ; SSE2-LABEL: 'replication_i1_stride2'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 624 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE3-LABEL: 'replication_i1_stride2'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 624 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSSE3-LABEL: 'replication_i1_stride2'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 624 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE41-LABEL: 'replication_i1_stride2'
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'replication_i1_stride2'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; AVX-LABEL: 'replication_i1_stride2'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 200 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 400 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-LABEL: 'replication_i1_stride2'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'replication_i1_stride2'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 138 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 276 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512FVEC512-LABEL: 'replication_i1_stride2'
 ; AVX512FVEC512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer
@@ -183,70 +194,81 @@ define void @replication_i1_stride2() nounwind "min-legal-vector-width"="256" {
 
 define void @replication_i1_stride3() nounwind "min-legal-vector-width"="256" {
 ; SSE2-LABEL: 'replication_i1_stride3'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 496 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE3-LABEL: 'replication_i1_stride3'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 496 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSSE3-LABEL: 'replication_i1_stride3'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 496 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE41-LABEL: 'replication_i1_stride3'
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 196 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 392 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'replication_i1_stride3'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 196 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 392 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; AVX-LABEL: 'replication_i1_stride3'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 268 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 536 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-LABEL: 'replication_i1_stride3'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 416 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'replication_i1_stride3'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 206 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 412 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512FVEC512-LABEL: 'replication_i1_stride3'
 ; AVX512FVEC512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer
@@ -349,70 +371,81 @@ define void @replication_i1_stride3() nounwind "min-legal-vector-width"="256" {
 
 define void @replication_i1_stride4() nounwind "min-legal-vector-width"="256" {
 ; SSE2-LABEL: 'replication_i1_stride4'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 560 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1120 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE3-LABEL: 'replication_i1_stride4'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 560 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1120 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSSE3-LABEL: 'replication_i1_stride4'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 560 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1120 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE41-LABEL: 'replication_i1_stride4'
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 260 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 520 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'replication_i1_stride4'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 260 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 520 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; AVX-LABEL: 'replication_i1_stride4'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 336 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 672 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-LABEL: 'replication_i1_stride4'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 138 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 276 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 552 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'replication_i1_stride4'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 274 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 548 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512FVEC512-LABEL: 'replication_i1_stride4'
 ; AVX512FVEC512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer
@@ -515,70 +548,81 @@ define void @replication_i1_stride4() nounwind "min-legal-vector-width"="256" {
 
 define void @replication_i1_stride5() nounwind "min-legal-vector-width"="256" {
 ; SSE2-LABEL: 'replication_i1_stride5'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 432 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 864 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1728 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE3-LABEL: 'replication_i1_stride5'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 432 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 864 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1728 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSSE3-LABEL: 'replication_i1_stride5'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 432 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 864 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1728 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE41-LABEL: 'replication_i1_stride5'
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 162 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 324 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'replication_i1_stride5'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 162 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 324 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; AVX-LABEL: 'replication_i1_stride5'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 101 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 404 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-LABEL: 'replication_i1_stride5'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 344 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 688 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'replication_i1_stride5'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 171 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 342 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 684 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512FVEC512-LABEL: 'replication_i1_stride5'
 ; AVX512FVEC512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer
@@ -681,70 +725,81 @@ define void @replication_i1_stride5() nounwind "min-legal-vector-width"="256" {
 
 define void @replication_i1_stride6() nounwind "min-legal-vector-width"="256" {
 ; SSE2-LABEL: 'replication_i1_stride6'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 232 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 464 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 928 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1856 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE3-LABEL: 'replication_i1_stride6'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 232 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 464 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 928 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1856 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSSE3-LABEL: 'replication_i1_stride6'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 232 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 464 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 928 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1856 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE41-LABEL: 'replication_i1_stride6'
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 97 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 194 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 388 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 776 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'replication_i1_stride6'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 97 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 194 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 388 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 776 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; AVX-LABEL: 'replication_i1_stride6'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 236 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 472 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 944 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-LABEL: 'replication_i1_stride6'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 206 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 412 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 824 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'replication_i1_stride6'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 410 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 820 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512FVEC512-LABEL: 'replication_i1_stride6'
 ; AVX512FVEC512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer
@@ -847,70 +902,81 @@ define void @replication_i1_stride6() nounwind "min-legal-vector-width"="256" {
 
 define void @replication_i1_stride7() nounwind "min-legal-vector-width"="256" {
 ; SSE2-LABEL: 'replication_i1_stride7'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 496 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 992 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1984 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE3-LABEL: 'replication_i1_stride7'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 496 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 992 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1984 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSSE3-LABEL: 'replication_i1_stride7'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 496 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 992 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1984 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE41-LABEL: 'replication_i1_stride7'
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 113 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 226 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'replication_i1_stride7'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 113 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 226 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; AVX-LABEL: 'replication_i1_stride7'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 270 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 540 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1080 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-LABEL: 'replication_i1_stride7'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'replication_i1_stride7'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 239 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 478 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 956 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512FVEC512-LABEL: 'replication_i1_stride7'
 ; AVX512FVEC512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer
@@ -1013,70 +1079,81 @@ define void @replication_i1_stride7() nounwind "min-legal-vector-width"="256" {
 
 define void @replication_i1_stride8() nounwind "min-legal-vector-width"="256" {
 ; SSE2-LABEL: 'replication_i1_stride8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1056 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2112 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE3-LABEL: 'replication_i1_stride8'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1056 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2112 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSSE3-LABEL: 'replication_i1_stride8'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1056 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2112 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE41-LABEL: 'replication_i1_stride8'
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 258 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 516 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1032 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'replication_i1_stride8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 258 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 516 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1032 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; AVX-LABEL: 'replication_i1_stride8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 304 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1216 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-LABEL: 'replication_i1_stride8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 274 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 548 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1096 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'replication_i1_stride8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 273 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 546 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1092 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512FVEC512-LABEL: 'replication_i1_stride8'
 ; AVX512FVEC512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll
index f947106..bb868b5 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll
@@ -6,8 +6,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
 ; Check delinearization in loop cache analysis can handle fixed-size arrays.
 ; The IR is copied from llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll
 
-; CHECK-DAG: Loop 'for.body' has cost = 4186116
-; CHECK-DAG: Loop 'for.body4' has cost = 128898
+; CHECK: Loop 'for.body' has cost = 4186116
+; CHECK: Loop 'for.body4' has cost = 128898
 
 ;; #define N 1024
 ;; #define M 2048
@@ -48,8 +48,8 @@ for.end13:                                        ; preds = %for.inc11
 }
 
 
-; CHECK-DAG: Loop 'for.body' has cost = 4186116
-; CHECK-DAG: Loop 'for.body4' has cost = 128898
+; CHECK: Loop 'for.body' has cost = 4186116
+; CHECK: Loop 'for.body4' has cost = 128898
 
 define void @t2([2048 x i32]* %a) {
 entry:
@@ -83,14 +83,13 @@ for.end13:                                        ; preds = %for.inc11
 
 declare [2048 x i32]* @func_with_returned_arg([2048 x i32]* returned %arg)
 
-; CHECK-DAG: Loop 'for.body' has cost = 4472886244958208
-; CHECK-DAG: Loop 'for.body4' has cost = 4472886244958208
-; CHECK-DAG: Loop 'for.body8' has cost = 4472886244958208
-; CHECK-DAG: Loop 'for.body12' has cost = 4472886244958208
-; CHECK-DAG: Loop 'for.body16' has cost = 137728168833024
+; CHECK: Loop 'for.body' has cost = 2112128815104000000
+; CHECK: Loop 'for.body4' has cost = 16762927104000000
+; CHECK: Loop 'for.body8' has cost = 130960368000000
+; CHECK: Loop 'for.body12' has cost = 1047682944000
+; CHECK: Loop 'for.body16' has cost = 32260032000
 
-
-;; #define N 1024
+;; #define N 128
 ;; #define M 2048
 ;; void t3(int a[][N][N][N][M]) {
 ;;   for (int i1 = 0; i1 < N-1; ++i1)
@@ -101,7 +100,7 @@ declare [2048 x i32]* @func_with_returned_arg([2048 x i32]* returned %arg)
 ;;             a[i1][i2][i3][i4][i5] = a[i1+1][i2-2][i3][i4-3][i5+2];
 ;; }
 
-define void @t3([1024 x [1024 x [1024 x [2048 x i32]]]]* %a) {
+define void @t3([128 x [128 x [128 x [2048 x i32]]]]* %a) {
 entry:
   br label %for.body
 
@@ -127,9 +126,9 @@ for.body16:                                       ; preds = %for.body12, %for.bo
   %1 = add nsw i64 %indvars.iv14, -2
   %2 = add nsw i64 %indvars.iv7, -3
   %3 = add nuw nsw i64 %indvars.iv, 2
-  %arrayidx26 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3
+  %arrayidx26 = getelementptr inbounds [128 x [128 x [128 x [2048 x i32]]]], [128 x [128 x [128 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3
   %4 = load i32, i32* %arrayidx26, align 4
-  %arrayidx36 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv
+  %arrayidx36 = getelementptr inbounds [128 x [128 x [128 x [2048 x i32]]]], [128 x [128 x [128 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv
   store i32 %4, i32* %arrayidx36, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, 2046
@@ -137,22 +136,22 @@ for.body16:                                       ; preds = %for.body12, %for.bo
 
 for.inc37:                                        ; preds = %for.body16
   %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
-  %exitcond10 = icmp ne i64 %indvars.iv.next8, 1024
+  %exitcond10 = icmp ne i64 %indvars.iv.next8, 128
   br i1 %exitcond10, label %for.body12, label %for.inc40
 
 for.inc40:                                        ; preds = %for.inc37
   %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
-  %exitcond13 = icmp ne i64 %indvars.iv.next12, 1024
+  %exitcond13 = icmp ne i64 %indvars.iv.next12, 128
   br i1 %exitcond13, label %for.body8, label %for.inc43
 
 for.inc43:                                        ; preds = %for.inc40
   %indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1
-  %exitcond17 = icmp ne i64 %indvars.iv.next15, 1024
+  %exitcond17 = icmp ne i64 %indvars.iv.next15, 128
   br i1 %exitcond17, label %for.body4, label %for.inc46
 
 for.inc46:                                        ; preds = %for.inc43
   %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
-  %exitcond21 = icmp ne i64 %indvars.iv.next19, 1023
+  %exitcond21 = icmp ne i64 %indvars.iv.next19, 127
   br i1 %exitcond21, label %for.body, label %for.end48
 
 for.end48:                                        ; preds = %for.inc46
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll
index b73abca..c5c9e76 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll
@@ -10,9 +10,9 @@ target triple = "powerpc64le-unknown-linux-gnu"
 ;         A[i][k][j] += B[i][k][j] + C[i][j][k];
 ; }
 
-; CHECK-DAG: Loop 'for.i' has cost = 3000000
-; CHECK-DAG: Loop 'for.k' has cost = 2030000
-; CHECK-DAG: Loop 'for.j' has cost = 1060000
+; CHECK: Loop 'for.i' has cost = 3000000
+; CHECK: Loop 'for.k' has cost = 2030000
+; CHECK: Loop 'for.j' has cost = 1060000
 
 define void @foo(i64 %n, i64 %m, i64 %o, i32* %A, i32* %B, i32* %C) {
 entry:
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll
index c7342fe..6679b19 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll
@@ -10,9 +10,9 @@ target triple = "powerpc64le-unknown-linux-gnu"
 ;         C[i][j] = C[i][j] + A[i][k] * B[k][j];
 ; }
 
-; CHECK-DAG:Loop 'for.i' has cost = 2010000
-; CHECK-DAG:Loop 'for.k' has cost = 1040000
-; CHECK-DAG:Loop 'for.j' has cost = 70000
+; CHECK:Loop 'for.i' has cost = 2010000
+; CHECK:Loop 'for.k' has cost = 1040000
+; CHECK:Loop 'for.j' has cost = 70000
     
 define void @matmul(i64 %n, i64 %m, i64 %o, i32* %A, i32* %B, i32* %C) {
 entry:
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll
index 3137952..2b317c0 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll
@@ -14,11 +14,11 @@ target triple = "powerpc64le-unknown-linux-gnu"
 ;                 y[k+1][j][i][l] = y[k+1][j][i][l] + b[k][j][i][m][l]*x[k][j][i][m]
 ; }
 
-; CHECK-DAG: Loop 'k_loop' has cost = 30000000000
-; CHECK-DAG: Loop 'j_loop' has cost = 30000000000
-; CHECK-DAG: Loop 'i_loop' has cost = 30000000000
-; CHECK-DAG: Loop 'm_loop' has cost = 10700000000
-; CHECK-DAG: Loop 'l_loop' has cost = 1300000000
+; CHECK: Loop 'k_loop' has cost = 10200000000000000
+; CHECK: Loop 'j_loop' has cost = 102000000000000
+; CHECK: Loop 'i_loop' has cost = 1020000000000
+; CHECK: Loop 'm_loop' has cost = 10700000000
+; CHECK: Loop 'l_loop' has cost = 1300000000
 
 %_elem_type_of_double = type <{ double }>
 
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/multi-store.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/multi-store.ll
new file mode 100644
index 0000000..89d959e
--- /dev/null
+++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/multi-store.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s  -opaque-pointers -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; CHECK-DAG: Loop 'for.j' has cost = 201000000
+; CHECK-DAG: Loop 'for.i' has cost = 102000000
+; CHECK-DAG: Loop 'for.k' has cost = 90000
+
+;; Test to make sure when we have multiple conflicting access patterns, the 
+;; chosen loop configuration favours the majority of those accesses. 
+;; For example this nest should be ordered as j-i-k.
+;;  for (int i = 0; i < n; i++)
+;;    for (int j = 0; j < n; j++)
+;;      for (int k = 0; k < n; k++) {
+;;        A[i][j][k] = 1;
+;;        B[j][i][k] = 2;
+;;        C[j][i][k] = 3;
+;;      }                            
+
+define void @foo(i32 noundef signext %n, ptr noalias noundef %A, ptr noalias noundef %B, ptr noalias noundef %C) {
+entry:
+  %0 = zext i32 %n to i64
+  %1 = zext i32 %n to i64
+  %2 = zext i32 %n to i64
+  %3 = zext i32 %n to i64
+  %4 = zext i32 %n to i64
+  %5 = zext i32 %n to i64
+  %cmp5 = icmp sgt i32 %n, 0
+  br i1 %cmp5, label %for.i.preheader, label %for.end30
+
+for.i.preheader:                               ; preds = %entry
+  %wide.trip.count16 = zext i32 %n to i64
+  br label %for.i
+
+for.i:                                         ; preds = %for.i.preheader, %for.inc28
+  %indvars.iv13 = phi i64 [ 0, %for.i.preheader ], [ %indvars.iv.next14, %for.inc28 ]
+  %cmp23 = icmp sgt i32 %n, 0
+  br i1 %cmp23, label %for.j.preheader, label %for.inc28
+
+for.j.preheader:                              ; preds = %for.i
+  %wide.trip.count11 = zext i32 %n to i64
+  br label %for.j
+
+for.j:                                        ; preds = %for.j.preheader, %for.inc25
+  %indvars.iv8 = phi i64 [ 0, %for.j.preheader ], [ %indvars.iv.next9, %for.inc25 ]
+  %cmp61 = icmp sgt i32 %n, 0
+  br i1 %cmp61, label %for.k.preheader, label %for.inc25
+
+for.k.preheader:                              ; preds = %for.j
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.k
+
+for.k:                                        ; preds = %for.k.preheader, %for.k
+  %indvars.iv = phi i64 [ 0, %for.k.preheader ], [ %indvars.iv.next, %for.k ]
+  %6 = mul nuw i64 %0, %1
+  %7 = mul nsw i64 %6, %indvars.iv13
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %7
+  %8 = mul nuw nsw i64 %indvars.iv8, %1
+  %arrayidx10 = getelementptr inbounds i32, ptr %arrayidx, i64 %8
+  %arrayidx12 = getelementptr inbounds i32, ptr %arrayidx10, i64 %indvars.iv
+  store i32 1, ptr %arrayidx12, align 4
+  %9 = mul nuw i64 %2, %3
+  %10 = mul nsw i64 %9, %indvars.iv8
+  %arrayidx14 = getelementptr inbounds i32, ptr %B, i64 %10
+  %11 = mul nuw nsw i64 %indvars.iv13, %3
+  %arrayidx16 = getelementptr inbounds i32, ptr %arrayidx14, i64 %11
+  %arrayidx18 = getelementptr inbounds i32, ptr %arrayidx16, i64 %indvars.iv
+  store i32 2, ptr %arrayidx18, align 4
+  %12 = mul nuw i64 %4, %5
+  %13 = mul nsw i64 %12, %indvars.iv8
+  %arrayidx20 = getelementptr inbounds i32, ptr %C, i64 %13
+  %14 = mul nuw nsw i64 %indvars.iv13, %5
+  %arrayidx22 = getelementptr inbounds i32, ptr %arrayidx20, i64 %14
+  %arrayidx24 = getelementptr inbounds i32, ptr %arrayidx22, i64 %indvars.iv
+  store i32 3, ptr %arrayidx24, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.k, label %for.inc25.loopexit
+
+for.inc25.loopexit:                               ; preds = %for.k
+  br label %for.inc25
+
+for.inc25:                                        ; preds = %for.inc25.loopexit, %for.j
+  %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1
+  %exitcond12 = icmp ne i64 %indvars.iv.next9, %wide.trip.count11
+  br i1 %exitcond12, label %for.j, label %for.inc28.loopexit
+
+for.inc28.loopexit:                               ; preds = %for.inc25
+  br label %for.inc28
+
+for.inc28:                                        ; preds = %for.inc28.loopexit, %for.i
+  %indvars.iv.next14 = add nuw nsw i64 %indvars.iv13, 1
+  %exitcond17 = icmp ne i64 %indvars.iv.next14, %wide.trip.count16
+  br i1 %exitcond17, label %for.i, label %for.end30.loopexit
+
+for.end30.loopexit:                               ; preds = %for.inc28
+  br label %for.end30
+
+for.end30:                                        ; preds = %for.end30.loopexit, %entry
+  ret void
+}
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll
index cce6731..8aa247b 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll
@@ -10,9 +10,9 @@ target triple = "powerpc64le-unknown-linux-gnu"
 ;         A[2*i+3][3*j-4][2*k+7] = 1;
 ; }
 
-; CHECK-DAG: Loop 'for.i' has cost = 1000000
-; CHECK-DAG: Loop 'for.j' has cost = 1000000
-; CHECK-DAG: Loop 'for.k' has cost = 60000
+; CHECK: Loop 'for.i' has cost = 100000000
+; CHECK: Loop 'for.j' has cost = 1000000
+; CHECK: Loop 'for.k' has cost = 60000
 
 define void @foo(i64 %n, i64 %m, i64 %o, i32* %A) {
 entry:
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll
index 1f15154..f2549f4 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll
@@ -11,8 +11,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
 ;     }   
 ; }
 
-; CHECK-DAG: Loop 'for.i' has cost = 20600
-; CHECK-DAG: Loop 'for.j' has cost = 800
+; CHECK: Loop 'for.i' has cost = 20600
+; CHECK: Loop 'for.j' has cost = 800
 
 define void @foo(i64 %n, i64 %m, i32* %A, i32* %B, i32* %C) {
 entry:
diff --git a/llvm/test/CodeGen/AArch64/cpus.ll b/llvm/test/CodeGen/AArch64/cpus.ll
index 88330b6..9167f12 100644
--- a/llvm/test/CodeGen/AArch64/cpus.ll
+++ b/llvm/test/CodeGen/AArch64/cpus.ll
@@ -34,6 +34,7 @@
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=tsv110 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=apple-latest 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=a64fx 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=ampere1 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
 
 ; CHECK-NOT: {{.*}}  is not a recognized processor for this target
diff --git a/llvm/test/CodeGen/AArch64/float-conv-elim.ll b/llvm/test/CodeGen/AArch64/float-conv-elim.ll
new file mode 100644
index 0000000..5442125
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/float-conv-elim.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s
+
+define i32 @s32_f32_s24_s32(i32 %a) {
+; CHECK-LABEL: s32_f32_s24_s32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sbfx w0, w0, #0, #24
+; CHECK-NEXT:    ret
+  %f = sitofp i32 %a to float
+  %i = fptosi float %f to i24
+  %r = sext i24 %i to i32
+  ret i32 %r
+}
+
+define i32 @s32_f32_u24_u32(i32 %a) {
+; CHECK-LABEL: s32_f32_u24_u32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w0, w0, #0xffffff
+; CHECK-NEXT:    ret
+  %f = sitofp i32 %a to float
+  %i = fptoui float %f to i24
+  %r = zext i24 %i to i32
+  ret i32 %r
+}
+
+define i32 @u32_f32_s24_s32(i32 %a) {
+; CHECK-LABEL: u32_f32_s24_s32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sbfx w0, w0, #0, #24
+; CHECK-NEXT:    ret
+  %f = uitofp i32 %a to float
+  %i = fptosi float %f to i24
+  %r = sext i24 %i to i32
+  ret i32 %r
+}
+
+define i32 @u32_f32_u24_u32(i32 %a) {
+; CHECK-LABEL: u32_f32_u24_u32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w0, w0, #0xffffff
+; CHECK-NEXT:    ret
+  %f = uitofp i32 %a to float
+  %i = fptoui float %f to i24
+  %r = zext i24 %i to i32
+  ret i32 %r
+}
+
+; This requires converting to FP and back.
+
+define i32 @s32_f32_s25_s32(i32 %a) {
+; CHECK-LABEL: s32_f32_s25_s32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf s0, w0
+; CHECK-NEXT:    fcvtzs w0, s0
+; CHECK-NEXT:    ret
+  %f = sitofp i32 %a to float
+  %i = fptosi float %f to i25
+  %r = sext i25 %i to i32
+  ret i32 %r
+}
+
+define i32 @s32_f32_u25_u32(i32 %a) {
+; CHECK-LABEL: s32_f32_u25_u32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf s0, w0
+; CHECK-NEXT:    fcvtzs w0, s0
+; CHECK-NEXT:    ret
+  %f = sitofp i32 %a to float
+  %i = fptoui float %f to i25
+  %r = zext i25 %i to i32
+  ret i32 %r
+}
+
+; TODO: This could avoid converting to FP.
+
+define i32 @u32_f32_s25_s32(i32 %a) {
+; CHECK-LABEL: u32_f32_s25_s32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf s0, w0
+; CHECK-NEXT:    fcvtzs w0, s0
+; CHECK-NEXT:    ret
+  %f = uitofp i32 %a to float
+  %i = fptosi float %f to i25
+  %r = sext i25 %i to i32
+  ret i32 %r
+}
+
+define i32 @u32_f32_u25_u32(i32 %a) {
+; CHECK-LABEL: u32_f32_u25_u32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf s0, w0
+; CHECK-NEXT:    fcvtzs w0, s0
+; CHECK-NEXT:    ret
+  %f = uitofp i32 %a to float
+  %i = fptoui float %f to i25
+  %r = zext i25 %i to i32
+  ret i32 %r
+}
diff --git a/llvm/test/CodeGen/AArch64/neon-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-dot-product.ll
index 6c76d4e9..f71546c 100644
--- a/llvm/test/CodeGen/AArch64/neon-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/neon-dot-product.ll
@@ -5,6 +5,7 @@
 ; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=neoverse-e1  < %s | FileCheck %s
 ; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=neoverse-n1  < %s | FileCheck %s
 ; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=neoverse-n2  < %s | FileCheck %s
+; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=ampere1      < %s | FileCheck %s
 
 declare <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>)
 declare <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>)
diff --git a/llvm/test/CodeGen/AArch64/remat.ll b/llvm/test/CodeGen/AArch64/remat.ll
index bd2786f..062aa47 100644
--- a/llvm/test/CodeGen/AArch64/remat.ll
+++ b/llvm/test/CodeGen/AArch64/remat.ll
@@ -24,6 +24,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=tsv110 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx3t110 -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=ampere1 -o - %s | FileCheck %s
 
 %X = type { i64, i64, i64 }
 declare void @f(%X*)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll
new file mode 100644
index 0000000..da11e6b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Ensure we don't crash by trying to fold fixed length frame indexes into
+; loads/stores that don't support an appropriate addressing mode, hence creating
+; too many extra vregs during frame lowering, when we don't have an emergency
+; spill slot.
+
+define dso_local void @func1(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8,
+                             i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* %v13, i64* %v14,  i64* %v15, i64* %v16,
+                             i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24,
+                             i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* %v31, i64* %v32,
+                             i64* %v33, i64* %v34, i64* %v35, i64* %v36, i64* %v37, i64* %v38, i64* %v39, i64* %v40,
+                             i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* %v46, i64* %v47, i64* %v48,
+                             i64 %v49) #0 {
+; CHECK-LABEL: func1
+  tail call void @func2(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8,
+                        i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* undef, i64* %v14, i64* %v15, i64* %v16,
+                        i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24,
+                        i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* undef, i64* undef,
+                        i64* undef, i64* undef, i64* undef, i64* undef, i64* %v37, i64* %v38, i64* %v39, i64* %v40,
+                        i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* undef, i64* %v47, i64* %v48,
+                        i64 undef)
+  ret void
+}
+
+declare dso_local void @func2(i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+                              i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+                              i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+                              i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+                              i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+                              i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+                              i64)
+
+attributes #0 = { "target-features"="+sve" vscale_range(2,2) }
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll
new file mode 100644
index 0000000..9227c4c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -debug-only=isel < %s 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Ensure that only no offset frame indexes are folded into SVE load/stores when
+; accessing fixed width objects.
+define void @foo(<8 x i64>* %a) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       SelectionDAG has 14 nodes:
+; CHECK-NEXT:    t0: ch = EntryToken
+; CHECK-NEXT:    t12: nxv2i1 = PTRUE_D TargetConstant:i32<31>
+; CHECK-NEXT:    t2: i64,ch = CopyFromReg t0, Register:i64 %0
+; CHECK-NEXT:    t18: nxv2i64,ch = LD1D_IMM<Mem:(volatile load (s512) from %ir.a)> t12, t2, TargetConstant:i64<0>, t0
+; CHECK-NEXT:    t8: i64 = ADDXri TargetFrameIndex:i64<1>, TargetConstant:i32<0>, TargetConstant:i32<0>
+; CHECK-NEXT:    t17: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r0)> t18, t12, TargetFrameIndex:i64<0>, TargetConstant:i64<0>, t0
+; CHECK-NEXT:    t16: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r1)> t18, t12, t8, TargetConstant:i64<0>, t17
+; CHECK-NEXT:    t10: ch = RET_ReallyLR t16
+; CHECK-EMPTY:
+entry:
+  %r0 = alloca <8 x i64>
+  %r1 = alloca <8 x i64>
+  %r = load volatile <8 x i64>, <8 x i64>* %a
+  store volatile <8 x i64> %r, <8 x i64>* %r0
+  store volatile <8 x i64> %r, <8 x i64>* %r1
+  ret void
+}
+
+attributes #0 = { nounwind "target-features"="+sve" vscale_range(4,4) }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
index 2a270a41..d41e8f8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
 
 ---
 name: test_mul_s32
@@ -25,6 +25,11 @@ body: |
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
     ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](s32)
+    ; GFX10-LABEL: name: test_mul_s32
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[MUL]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = G_MUL %0, %1
@@ -64,6 +69,15 @@ body: |
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-LABEL: name: test_mul_v2s32
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = G_MUL %0, %1
@@ -115,6 +129,19 @@ body: |
     ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
     ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX10-LABEL: name: test_mul_s64
+    ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = G_MUL %0, %1
@@ -202,6 +229,31 @@ body: |
     ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32)
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX10-LABEL: name: test_mul_v2s64
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>) = G_MUL %0, %1
@@ -237,6 +289,14 @@ body: |
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC1]]
     ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[MUL]](s16)
     ; GFX9-NEXT: $vgpr0 = COPY [[ZEXT]](s32)
+    ; GFX10-LABEL: name: test_mul_s16
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC1]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[MUL]](s16)
+    ; GFX10-NEXT: $vgpr0 = COPY [[ZEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -294,6 +354,11 @@ body: |
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[COPY]], [[COPY1]]
     ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](<2 x s16>)
+    ; GFX10-LABEL: name: test_mul_v2s16
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[COPY]], [[COPY1]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[MUL]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_MUL %0, %1
@@ -359,6 +424,29 @@ body: |
     ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[MUL1]](<2 x s16>)
     ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
     ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16)
+    ; GFX10-LABEL: name: test_mul_v3s16
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+    ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY6]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF]](s32)
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[MUL]](<2 x s16>)
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[MUL1]](<2 x s16>)
+    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+    ; GFX10-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -462,6 +550,15 @@ body: |
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[UV1]], [[UV3]]
     ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[MUL]](<2 x s16>), [[MUL1]](<2 x s16>)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX10-LABEL: name: test_mul_v4s16
+    ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[UV]], [[UV2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[UV1]], [[UV3]]
+    ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[MUL]](<2 x s16>), [[MUL1]](<2 x s16>)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s16>) = G_MUL %0, %1
@@ -489,6 +586,11 @@ body: |
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
     ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](s32)
+    ; GFX10-LABEL: name: test_mul_s24
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[MUL]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s24) = G_TRUNC %0
@@ -542,6 +644,19 @@ body: |
     ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
     ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX10-LABEL: name: test_mul_s33
+    ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s33) = G_TRUNC %0
@@ -635,6 +750,32 @@ body: |
     ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]]
     ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    ; GFX10-LABEL: name: test_mul_s96
+    ; GFX10: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; GFX10-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(s96) = G_MUL %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
index 69f2583..6405bf3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer  -o - %s | FileCheck -check-prefix=GFX8 %s
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer  -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer  -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer  -o - %s | FileCheck -check-prefix=GFX10 %s
 
 ---
 name: test_sdiv_s32
@@ -118,6 +118,42 @@ body: |
     ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ; GFX10-LABEL: name: test_sdiv_s32
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = G_SDIV %0, %1
@@ -331,6 +367,73 @@ body: |
     ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-LABEL: name: test_sdiv_v2s32
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+    ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]]
+    ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]]
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]]
+    ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]]
+    ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]]
+    ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]]
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]]
+    ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]]
+    ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
+    ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]]
+    ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = G_SDIV %0, %1
@@ -853,6 +956,176 @@ body: |
     ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]]
     ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ; GFX10-LABEL: name: test_sdiv_s64
+    ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32)
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
+    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
+    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
+    ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32)
+    ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]]
+    ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]]
+    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]]
+    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]]
+    ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]]
+    ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = G_SDIV %0, %1
@@ -1858,6 +2131,337 @@ body: |
     ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32)
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX10-LABEL: name: test_sdiv_v2s64
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32)
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]]
+    ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
+    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
+    ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
+    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
+    ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32)
+    ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]]
+    ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
+    ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]]
+    ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]]
+    ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]]
+    ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]]
+    ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]]
+    ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV27]], [[UADDO39]]
+    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
+    ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
+    ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV29]], [[UADDO41]]
+    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]]
+    ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO7]]
+    ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32)
+    ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
+    ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
+    ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]]
+    ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32)
+    ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
+    ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]]
+    ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32)
+    ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
+    ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
+    ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32)
+    ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32)
+    ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]]
+    ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32)
+    ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]]
+    ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]]
+    ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]]
+    ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]]
+    ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32)
+    ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]]
+    ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO9]]
+    ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]]
+    ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]]
+    ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]]
+    ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]]
+    ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]]
+    ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]]
+    ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]]
+    ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
+    ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]]
+    ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]]
+    ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]]
+    ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
+    ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]]
+    ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]]
+    ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]]
+    ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]]
+    ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]]
+    ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO56]]
+    ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]]
+    ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]]
+    ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO56]]
+    ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]]
+    ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL24]]
+    ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]]
+    ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]]
+    ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]]
+    ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
+    ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]]
+    ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL24]]
+    ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]]
+    ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]]
+    ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
+    ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]]
+    ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]]
+    ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]]
+    ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]]
+    ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]]
+    ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]]
+    ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE18]]
+    ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]]
+    ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
+    ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]]
+    ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
+    ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE18]]
+    ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]]
+    ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE18]]
+    ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
+    ; GFX10-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]]
+    ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1)
+    ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX10-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]]
+    ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1)
+    ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]]
+    ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE18]]
+    ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]]
+    ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32)
+    ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]]
+    ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]]
+    ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD33]]
+    ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]]
+    ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]]
+    ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]]
+    ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD35]], [[USUBO11]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD35]]
+    ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV55]]
+    ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV54]]
+    ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV55]]
+    ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV54]]
+    ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]]
+    ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]]
+    ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX10-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV57]], [[UADDO81]]
+    ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32)
+    ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]]
+    ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]]
+    ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]]
+    ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX10-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV59]], [[UADDO83]]
+    ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32)
+    ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]]
+    ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]]
+    ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]]
+    ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]]
+    ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64)
+    ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64)
+    ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]]
+    ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO15]]
+    ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>) = G_SDIV %0, %1
@@ -1984,6 +2588,44 @@ body: |
     ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ; GFX10-LABEL: name: test_sdiv_s16
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16
+    ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -2231,6 +2873,80 @@ body: |
     ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
     ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB3]](s32), [[SUB7]](s32)
     ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX10-LABEL: name: test_sdiv_v2s16
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
+    ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX10-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
+    ; GFX10-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16
+    ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+    ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]]
+    ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]]
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]]
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]]
+    ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]]
+    ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]]
+    ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]]
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]]
+    ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]]
+    ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
+    ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]]
+    ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB3]](s32), [[SUB7]](s32)
+    ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_SDIV %0, %1
@@ -2357,6 +3073,44 @@ body: |
     ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ; GFX10-LABEL: name: test_sdiv_s7
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7
+    ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s7) = G_TRUNC %0
@@ -2486,6 +3240,44 @@ body: |
     ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ; GFX10-LABEL: name: test_sdiv_s17
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17
+    ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s17) = G_TRUNC %0
@@ -3017,6 +3809,178 @@ body: |
     ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]]
     ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ; GFX10-LABEL: name: test_sdiv_s33
+    ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33
+    ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64)
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
+    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
+    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
+    ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32)
+    ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]]
+    ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]]
+    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]]
+    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]]
+    ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]]
+    ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s33) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
index d8bfabd..ba71db5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
 
 ---
 name: test_srem_s32
@@ -106,6 +106,38 @@ body: |
     ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
     ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32)
+    ; GFX10-LABEL: name: test_srem_s32
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
+    ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = G_SREM %0, %1
@@ -298,6 +330,66 @@ body: |
     ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-LABEL: name: test_srem_v2s32
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
+    ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+    ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]]
+    ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]]
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]]
+    ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]]
+    ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]]
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]]
+    ; GFX10-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]]
+    ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]]
+    ; GFX10-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = G_SREM %0, %1
@@ -805,6 +897,171 @@ body: |
     ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]]
     ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ; GFX10-LABEL: name: test_srem_s64
+    ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32)
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
+    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
+    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
+    ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]]
+    ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]]
+    ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = G_SREM %0, %1
@@ -1783,6 +2040,328 @@ body: |
     ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32)
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX10-LABEL: name: test_srem_v2s64
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32)
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]]
+    ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
+    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
+    ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
+    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
+    ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]]
+    ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
+    ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]]
+    ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]]
+    ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]]
+    ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]]
+    ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
+    ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
+    ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]]
+    ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]]
+    ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]]
+    ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]]
+    ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
+    ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
+    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]]
+    ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
+    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]]
+    ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
+    ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
+    ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32)
+    ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32)
+    ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]]
+    ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32)
+    ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]]
+    ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]]
+    ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]]
+    ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]]
+    ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32)
+    ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]]
+    ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]]
+    ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]]
+    ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]]
+    ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]]
+    ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]]
+    ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]]
+    ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]]
+    ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
+    ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]]
+    ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
+    ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]]
+    ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]]
+    ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]]
+    ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
+    ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]]
+    ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]]
+    ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]]
+    ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]]
+    ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]]
+    ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]]
+    ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]]
+    ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]]
+    ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]]
+    ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]]
+    ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL24]]
+    ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]]
+    ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]]
+    ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]]
+    ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
+    ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]]
+    ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL24]]
+    ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]]
+    ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]]
+    ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
+    ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]]
+    ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]]
+    ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]]
+    ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]]
+    ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]]
+    ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]]
+    ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE14]]
+    ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]]
+    ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]]
+    ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
+    ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE14]]
+    ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]]
+    ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE14]]
+    ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
+    ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]]
+    ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
+    ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]]
+    ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
+    ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]]
+    ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE14]]
+    ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]]
+    ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]]
+    ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]]
+    ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD33]]
+    ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]]
+    ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]]
+    ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]]
+    ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD35]], [[USUBO13]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD35]]
+    ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32)
+    ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]]
+    ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]]
+    ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]]
+    ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]]
+    ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]]
+    ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]]
+    ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32)
+    ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]]
+    ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]]
+    ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]]
+    ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX10-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]]
+    ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]]
+    ; GFX10-NEXT: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]]
+    ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32)
+    ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]]
+    ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]]
+    ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]]
+    ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
+    ; GFX10-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]]
+    ; GFX10-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]]
+    ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>) = G_SREM %0, %1
@@ -1903,6 +2482,42 @@ body: |
     ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]]
     ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32)
+    ; GFX10-LABEL: name: test_srem_s16
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16
+    ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
+    ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -2129,6 +2744,73 @@ body: |
     ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
     ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB4]](s32), [[SUB9]](s32)
     ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX10-LABEL: name: test_srem_v2s16
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
+    ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
+    ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX10-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
+    ; GFX10-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16
+    ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+    ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]]
+    ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]]
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]]
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]]
+    ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]]
+    ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]]
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]]
+    ; GFX10-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]]
+    ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]]
+    ; GFX10-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB4]](s32), [[SUB9]](s32)
+    ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_SREM %0, %1
@@ -2243,6 +2925,40 @@ body: |
     ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
     ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32)
+    ; GFX10-LABEL: name: test_srem_s7
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7
+    ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
+    ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s7) = G_TRUNC %0
@@ -2360,6 +3076,40 @@ body: |
     ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
     ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32)
+    ; GFX10-LABEL: name: test_srem_s17
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17
+    ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
+    ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s17) = G_TRUNC %0
@@ -2876,6 +3626,173 @@ body: |
     ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]]
     ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ; GFX10-LABEL: name: test_srem_s33
+    ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33
+    ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64)
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
+    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
+    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
+    ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]]
+    ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]]
+    ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s33) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir
index 46ea14df..d17c2bf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
 
 ---
 name: test_udiv_s32
@@ -88,6 +88,32 @@ body: |
     ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]]
     ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32)
+    ; GFX10-LABEL: name: test_udiv_s32
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = G_UDIV %0, %1
@@ -244,6 +270,54 @@ body: |
     ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]]
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-LABEL: name: test_udiv_v2s32
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]]
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]]
+    ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]]
+    ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]]
+    ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]]
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]]
+    ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = G_UDIV %0, %1
@@ -700,6 +774,154 @@ body: |
     ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
     ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
+    ; GFX10-LABEL: name: test_udiv_s64
+    ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
+    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
+    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
+    ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32)
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]]
+    ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = G_UDIV %0, %1
@@ -1576,6 +1798,294 @@ body: |
     ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]]
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX10-LABEL: name: test_udiv_v2s64
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32)
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]]
+    ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
+    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
+    ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
+    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
+    ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32)
+    ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]]
+    ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
+    ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]]
+    ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]]
+    ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]]
+    ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]]
+    ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]]
+    ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV19]], [[UADDO35]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]]
+    ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]]
+    ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV21]], [[UADDO37]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32)
+    ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32)
+    ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]]
+    ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32)
+    ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]]
+    ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]]
+    ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]]
+    ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]]
+    ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32)
+    ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]]
+    ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO7]]
+    ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI2]]
+    ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]]
+    ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]]
+    ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[FPTOUI2]]
+    ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]]
+    ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]]
+    ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
+    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]]
+    ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
+    ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]]
+    ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]]
+    ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
+    ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]]
+    ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
+    ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]]
+    ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]]
+    ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]]
+    ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]]
+    ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]]
+    ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO48]]
+    ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]]
+    ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]]
+    ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO48]]
+    ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]]
+    ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[MUL24]]
+    ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]]
+    ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]]
+    ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]]
+    ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
+    ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]]
+    ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[MUL24]]
+    ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]]
+    ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]]
+    ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
+    ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]]
+    ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]]
+    ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]]
+    ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]]
+    ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]]
+    ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]]
+    ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE10]]
+    ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]]
+    ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]]
+    ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
+    ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE10]]
+    ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]]
+    ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE10]]
+    ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]]
+    ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
+    ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]]
+    ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
+    ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]]
+    ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE10]]
+    ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]]
+    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32)
+    ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]]
+    ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]]
+    ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD33]]
+    ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]]
+    ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]]
+    ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]]
+    ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD35]], [[USUBO9]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD35]]
+    ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV35]]
+    ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV34]]
+    ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV35]]
+    ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV34]]
+    ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO9]]
+    ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]]
+    ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV37]], [[UADDO73]]
+    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32)
+    ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]]
+    ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]]
+    ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]]
+    ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV39]], [[UADDO75]]
+    ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32)
+    ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]]
+    ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]]
+    ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]]
+    ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>) = G_UDIV %0, %1
@@ -1678,6 +2188,36 @@ body: |
     ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
     ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]]
     ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32)
+    ; GFX10-LABEL: name: test_udiv_s16
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[AND2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -1869,6 +2409,62 @@ body: |
     ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]]
     ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT2]](s32), [[SELECT5]](s32)
     ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX10-LABEL: name: test_udiv_v2s16
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]]
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]]
+    ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]]
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]]
+    ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]]
+    ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]]
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]]
+    ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]]
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT2]](s32), [[SELECT5]](s32)
+    ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_UDIV %0, %1
@@ -1968,6 +2564,35 @@ body: |
     ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
     ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32)
+    ; GFX10-LABEL: name: test_udiv_s7
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s7) = G_TRUNC %0
@@ -2070,6 +2695,35 @@ body: |
     ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
     ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32)
+    ; GFX10-LABEL: name: test_udiv_s17
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s17) = G_TRUNC %0
@@ -2538,6 +3192,157 @@ body: |
     ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
     ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
+    ; GFX10-LABEL: name: test_udiv_s33
+    ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
+    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
+    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
+    ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32)
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]]
+    ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s33) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir
index 47077c7..20bfb0b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir
@@ -10,24 +10,24 @@ body: |
 
     ; GFX8-LABEL: name: test_umulo_s32
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]]
-    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
-    ; GFX8: $vgpr0 = COPY [[MUL]](s32)
-    ; GFX8: $vgpr1 = COPY [[ZEXT]](s32)
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]]
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
+    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
+    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[MUL]](s32)
+    ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
     ; GFX9-LABEL: name: test_umulo_s32
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]]
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
-    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
-    ; GFX9: $vgpr0 = COPY [[MUL]](s32)
-    ; GFX9: $vgpr1 = COPY [[ZEXT]](s32)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]]
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
+    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
+    ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](s32)
+    ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32), %3:_(s1) = G_UMULO %0, %1
@@ -44,46 +44,46 @@ body: |
 
     ; GFX8-LABEL: name: test_umulo_v2s32
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
-    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
-    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
-    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
-    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]]
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
-    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
-    ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
-    ; GFX8: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>)
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
+    ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
+    ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
+    ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]]
+    ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32)
+    ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
+    ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
+    ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>)
     ; GFX9-LABEL: name: test_umulo_v2s32
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
-    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
-    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
-    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
-    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
-    ; GFX9: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
+    ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
+    ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
+    ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>), %3:_(<2 x s1>) = G_UMULO %0, %1
@@ -100,86 +100,86 @@ body: |
 
     ; GFX8-LABEL: name: test_umulo_s64
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
-    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
-    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
-    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
-    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
-    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]]
-    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
-    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
-    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
-    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
-    ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]]
-    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
-    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
-    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
-    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
-    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]]
-    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32)
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]]
-    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[MV1]](s64)
-    ; GFX8: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64)
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+    ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+    ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+    ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
+    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
+    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
+    ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]]
+    ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
+    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
+    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
+    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
+    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
+    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
+    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
+    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]]
+    ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32)
+    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]]
+    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64)
+    ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64)
     ; GFX9-LABEL: name: test_umulo_s64
     ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
-    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
-    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
-    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
-    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
-    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]]
-    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
-    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
-    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
-    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]]
-    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
-    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
-    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
-    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
-    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]]
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32)
-    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]]
-    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV1]](s64)
-    ; GFX9: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+    ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+    ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+    ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
+    ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]]
+    ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
+    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
+    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
+    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
+    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
+    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32)
+    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]]
+    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64)
+    ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64), %3:_(s1) = G_UMULO %0, %1
@@ -196,170 +196,170 @@ body: |
 
     ; GFX8-LABEL: name: test_umulo_v2s64
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
-    ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
-    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
-    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
-    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
-    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]]
-    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]]
-    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]]
-    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
-    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]]
-    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
-    ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]]
-    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]]
-    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]]
-    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]]
-    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
-    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]]
-    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32)
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]]
-    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]]
-    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]]
-    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]]
-    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]]
-    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]]
-    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]]
-    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]]
-    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]]
-    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]]
-    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]]
-    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]]
-    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]]
-    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]]
-    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32)
-    ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]]
-    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]]
-    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]]
-    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]]
-    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]]
-    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]]
-    ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32)
-    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]]
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64)
-    ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1)
-    ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]]
-    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1)
-    ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]]
-    ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64)
-    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
-    ; GFX8: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>)
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
+    ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
+    ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
+    ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
+    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
+    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]]
+    ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]]
+    ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]]
+    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
+    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
+    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]]
+    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
+    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]]
+    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
+    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]]
+    ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32)
+    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]]
+    ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]]
+    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]]
+    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]]
+    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]]
+    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]]
+    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]]
+    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32)
+    ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]]
+    ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]]
+    ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]]
+    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]]
+    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]]
+    ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32)
+    ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]]
+    ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64)
+    ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1)
+    ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]]
+    ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1)
+    ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]]
+    ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64)
+    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX8-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>)
     ; GFX9-LABEL: name: test_umulo_v2s64
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
-    ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
-    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
-    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
-    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
-    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]]
-    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]]
-    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]]
-    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
-    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]]
-    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]]
-    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]]
-    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]]
-    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]]
-    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
-    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]]
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32)
-    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]]
-    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]]
-    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]]
-    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]]
-    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]]
-    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]]
-    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]]
-    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]]
-    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]]
-    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]]
-    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]]
-    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]]
-    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]]
-    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]]
-    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32)
-    ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]]
-    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]]
-    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]]
-    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]]
-    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]]
-    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]]
-    ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32)
-    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1)
-    ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]]
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
-    ; GFX9: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
+    ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
+    ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
+    ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]]
+    ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]]
+    ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]]
+    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
+    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]]
+    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]]
+    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
+    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32)
+    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]]
+    ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]]
+    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]]
+    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]]
+    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]]
+    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]]
+    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]]
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32)
+    ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]]
+    ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]]
+    ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]]
+    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]]
+    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]]
+    ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32)
+    ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]]
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]]
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>), %3:_(<2 x s1>) = G_UMULO %0, %1
@@ -376,38 +376,38 @@ body: |
 
     ; GFX8-LABEL: name: test_umulo_s24
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
-    ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]]
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]]
-    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
-    ; GFX8: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]]
-    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1)
-    ; GFX8: $vgpr0 = COPY [[AND3]](s32)
-    ; GFX8: $vgpr1 = COPY [[ZEXT]](s32)
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]]
+    ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
+    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]]
+    ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
+    ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]]
+    ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32)
+    ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
     ; GFX9-LABEL: name: test_umulo_s24
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]]
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
-    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
-    ; GFX9: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1)
-    ; GFX9: $vgpr0 = COPY [[AND3]](s32)
-    ; GFX9: $vgpr1 = COPY [[ZEXT]](s32)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]]
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
+    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1)
+    ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](s32)
+    ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s24) = G_TRUNC %0
@@ -428,30 +428,30 @@ body: |
 
     ; GFX8-LABEL: name: test_umulo_s16
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
-    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
-    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
-    ; GFX8: $vgpr0 = COPY [[AND3]](s32)
-    ; GFX8: $vgpr1 = COPY [[ZEXT]](s32)
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
+    ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
+    ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32)
+    ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
     ; GFX9-LABEL: name: test_umulo_s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
-    ; GFX9: $vgpr0 = COPY [[AND3]](s32)
-    ; GFX9: $vgpr1 = COPY [[ZEXT]](s32)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
+    ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](s32)
+    ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -471,30 +471,30 @@ body: |
 
     ; GFX8-LABEL: name: test_umulo_s8
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
-    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
-    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
-    ; GFX8: $vgpr0 = COPY [[AND3]](s32)
-    ; GFX8: $vgpr1 = COPY [[ZEXT]](s32)
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
+    ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
+    ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32)
+    ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
     ; GFX9-LABEL: name: test_umulo_s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
-    ; GFX9: $vgpr0 = COPY [[AND3]](s32)
-    ; GFX9: $vgpr1 = COPY [[ZEXT]](s32)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
+    ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](s32)
+    ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s8) = G_TRUNC %0
@@ -513,70 +513,70 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX8-LABEL: name: test_umulo_v2s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
-    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
-    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
-    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
-    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
-    ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
-    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]]
-    ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]]
-    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]]
-    ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]]
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
-    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]]
-    ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX8: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
-    ; GFX8: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32)
-    ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
-    ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; GFX8: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]]
-    ; GFX8: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]]
-    ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND10]](s32), [[AND11]](s32)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>)
-    ; GFX8: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+    ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
+    ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
+    ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
+    ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
+    ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
+    ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]]
+    ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]]
+    ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]]
+    ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]]
+    ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
+    ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]]
+    ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
+    ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
+    ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32)
+    ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
+    ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]]
+    ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]]
+    ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND10]](s32), [[AND11]](s32)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>)
+    ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_umulo_v2s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
-    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
-    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]]
-    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]]
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[MUL]](s32), [[MUL1]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>)
-    ; GFX9: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
+    ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
+    ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]]
+    ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]]
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[MUL]](s32), [[MUL1]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]]
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>)
+    ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s16>) = G_TRUNC %0
@@ -596,64 +596,64 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
     ; GFX8-LABEL: name: test_umulo_v2s8
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
-    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
-    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
-    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]]
-    ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]]
-    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]]
-    ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32)
-    ; GFX8: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32)
-    ; GFX8: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]]
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
-    ; GFX8: $vgpr1 = COPY [[ANYEXT1]](s32)
-    ; GFX8: $vgpr2 = COPY [[ANYEXT2]](s32)
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
+    ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
+    ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]]
+    ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]]
+    ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]]
+    ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32)
+    ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32)
+    ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]]
+    ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32)
+    ; GFX8-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: test_umulo_v2s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
-    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
-    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]]
-    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]]
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
-    ; GFX9: $vgpr1 = COPY [[ANYEXT1]](s32)
-    ; GFX9: $vgpr2 = COPY [[ANYEXT2]](s32)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
+    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]]
+    ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]]
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32)
+    ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -684,84 +684,84 @@ body: |
     liveins: $vgpr0, $vgpr1
     ; GFX8-LABEL: name: test_umulo_v4s8
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
-    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
-    ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
-    ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32)
-    ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
-    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
-    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
-    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
-    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]]
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
-    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]]
-    ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]]
-    ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX8: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]]
-    ; GFX8: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]]
-    ; GFX8: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]]
-    ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32)
-    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]]
-    ; GFX8: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]]
-    ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
-    ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX8: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]]
-    ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32)
-    ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; GFX8: $vgpr0 = COPY [[OR2]](s32)
-    ; GFX8: $vgpr1 = COPY [[ANYEXT]](s32)
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+    ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+    ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32)
+    ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
+    ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
+    ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
+    ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]]
+    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
+    ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]]
+    ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]]
+    ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]]
+    ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]]
+    ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]]
+    ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32)
+    ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]]
+    ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]]
+    ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
+    ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX8-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]]
+    ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32)
+    ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_umulo_v4s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
-    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]]
-    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
-    ; GFX9: $vgpr1 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]]
+    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir
index 934f5b5..cf63e5398c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
 
 ---
 name: test_urem_s32
@@ -79,6 +79,29 @@ body: |
     ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]]
     ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32)
+    ; GFX10-LABEL: name: test_urem_s32
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = G_UREM %0, %1
@@ -220,6 +243,49 @@ body: |
     ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]]
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-LABEL: name: test_urem_v2s32
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]]
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]]
+    ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]]
+    ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]]
+    ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = G_UREM %0, %1
@@ -664,6 +730,150 @@ body: |
     ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
     ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
+    ; GFX10-LABEL: name: test_urem_s64
+    ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
+    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
+    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
+    ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = G_UREM %0, %1
@@ -1519,6 +1729,287 @@ body: |
     ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]]
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX10-LABEL: name: test_urem_v2s64
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32)
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]]
+    ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
+    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
+    ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
+    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
+    ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]]
+    ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
+    ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]]
+    ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]]
+    ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]]
+    ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]]
+    ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]]
+    ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]]
+    ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]]
+    ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]]
+    ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32)
+    ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32)
+    ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]]
+    ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32)
+    ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]]
+    ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]]
+    ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]]
+    ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]]
+    ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32)
+    ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]]
+    ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]]
+    ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]]
+    ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]]
+    ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]]
+    ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]]
+    ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]]
+    ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]]
+    ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]]
+    ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]]
+    ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]]
+    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
+    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]]
+    ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
+    ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]]
+    ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
+    ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]]
+    ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]]
+    ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]]
+    ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]]
+    ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]]
+    ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]]
+    ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]]
+    ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]]
+    ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]]
+    ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL24]]
+    ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]]
+    ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]]
+    ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]]
+    ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
+    ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]]
+    ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL24]]
+    ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]]
+    ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]]
+    ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
+    ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]]
+    ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]]
+    ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]]
+    ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]]
+    ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]]
+    ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]]
+    ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]]
+    ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]]
+    ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
+    ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE6]]
+    ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]]
+    ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE6]]
+    ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]]
+    ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
+    ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]]
+    ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]]
+    ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE6]]
+    ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]]
+    ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]]
+    ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]]
+    ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD33]]
+    ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]]
+    ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]]
+    ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]]
+    ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD35]], [[USUBO11]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD35]]
+    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32)
+    ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]]
+    ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]]
+    ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]]
+    ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]]
+    ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]]
+    ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]]
+    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32)
+    ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]]
+    ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]]
+    ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]]
+    ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]]
+    ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]]
+    ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]]
+    ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32)
+    ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]]
+    ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]]
+    ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]]
+    ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>) = G_UREM %0, %1
@@ -1612,6 +2103,33 @@ body: |
     ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
     ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]]
     ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32)
+    ; GFX10-LABEL: name: test_urem_s16
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[AND2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -1788,6 +2306,57 @@ body: |
     ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]]
     ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT1]](s32), [[SELECT3]](s32)
     ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX10-LABEL: name: test_urem_v2s16
+    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]]
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]]
+    ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]]
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]]
+    ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]]
+    ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]]
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT1]](s32), [[SELECT3]](s32)
+    ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_UREM %0, %1
@@ -1878,6 +2447,32 @@ body: |
     ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
     ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32)
+    ; GFX10-LABEL: name: test_urem_s7
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s7) = G_TRUNC %0
@@ -1971,6 +2566,32 @@ body: |
     ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
     ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32)
+    ; GFX10-LABEL: name: test_urem_s17
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+    ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+    ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s17) = G_TRUNC %0
@@ -2427,6 +3048,153 @@ body: |
     ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
     ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
+    ; GFX10-LABEL: name: test_urem_s33
+    ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
+    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
+    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
+    ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s33) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index a4eb3a4..129a2c2 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -1748,46 +1748,35 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa
 define amdgpu_kernel void @v_insertelement_v8f16_3(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in, i32 %val) {
 ; GFX9-LABEL: v_insertelement_v8f16_3:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x10
-; GFX9-NEXT:    s_add_u32 s0, s0, s7
-; GFX9-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[10:11]
-; GFX9-NEXT:    v_mov_b32_e32 v5, s6
-; GFX9-NEXT:    buffer_store_short v5, off, s[0:3], 0 offset:16
-; GFX9-NEXT:    buffer_load_dword v5, off, s[0:3], 0 offset:16
-; GFX9-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_lshl_or_b32 v1, v5, 16, v1
-; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[8:9]
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v1, s6, 16, v1
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_insertelement_v8f16_3:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x10
 ; VI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
-; VI-NEXT:    s_add_u32 s0, s0, s7
-; VI-NEXT:    s_addc_u32 s1, s1, 0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s11
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s10, v4
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v5, s4
 ; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
-; VI-NEXT:    buffer_store_short v5, off, s[0:3], 0 offset:16
-; VI-NEXT:    buffer_load_dword v6, off, s[0:3], 0 offset:16
-; VI-NEXT:    v_mov_b32_e32 v5, s9
-; VI-NEXT:    v_add_u32_e32 v4, vcc, s8, v4
-; VI-NEXT:    s_mov_b32 s4, 0xffff
+; VI-NEXT:    v_mov_b32_e32 v5, s1
+; VI-NEXT:    s_lshl_b32 s1, s4, 16
+; VI-NEXT:    s_mov_b32 s2, 0xffff
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
+; VI-NEXT:    v_mov_b32_e32 v6, s1
 ; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; VI-NEXT:    s_waitcnt vmcnt(2)
-; VI-NEXT:    v_bfi_b32 v3, s4, v3, v3
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
+; VI-NEXT:    v_bfi_b32 v3, s2, v3, v3
 ; VI-NEXT:    v_or_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
@@ -1826,45 +1815,35 @@ define amdgpu_kernel void @v_insertelement_v8f16_3(<8 x half> addrspace(1)* %out
 define amdgpu_kernel void @v_insertelement_v8i16_6(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in, i32 %val) {
 ; GFX9-LABEL: v_insertelement_v8i16_6:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x10
-; GFX9-NEXT:    s_add_u32 s0, s0, s7
-; GFX9-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[10:11]
-; GFX9-NEXT:    v_mov_b32_e32 v5, s6
-; GFX9-NEXT:    buffer_store_short v5, off, s[0:3], 0 offset:16
-; GFX9-NEXT:    buffer_load_dword v5, off, s[0:3], 0 offset:16
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0xffff
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_bfi_b32 v3, v6, v5, v3
-; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[8:9]
+; GFX9-NEXT:    v_bfi_b32 v3, v5, s6, v3
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_insertelement_v8i16_6:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x10
 ; VI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
-; VI-NEXT:    s_add_u32 s0, s0, s7
-; VI-NEXT:    s_addc_u32 s1, s1, 0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s11
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s10, v4
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v5, s4
 ; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
-; VI-NEXT:    buffer_store_short v5, off, s[0:3], 0 offset:16
-; VI-NEXT:    buffer_load_dword v6, off, s[0:3], 0 offset:16
-; VI-NEXT:    s_mov_b32 s4, 0xffff
-; VI-NEXT:    v_mov_b32_e32 v5, s9
-; VI-NEXT:    v_add_u32_e32 v4, vcc, s8, v4
+; VI-NEXT:    s_mov_b32 s2, 0xffff
+; VI-NEXT:    v_mov_b32_e32 v5, s1
+; VI-NEXT:    v_mov_b32_e32 v6, s4
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
 ; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; VI-NEXT:    s_waitcnt vmcnt(2)
-; VI-NEXT:    v_bfi_b32 v1, s4, v1, v1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_bfi_b32 v3, s4, v6, v3
+; VI-NEXT:    v_bfi_b32 v3, s2, v6, v3
+; VI-NEXT:    v_bfi_b32 v1, s2, v1, v1
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll
new file mode 100644
index 0000000..d4ca078
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX906 %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefixes=GFX908 %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A %s
+
+define amdgpu_kernel void @scalar_to_vector_v8i16(<2 x i32> %in, <8 x i16>* %out) #0 {
+; GFX900-LABEL: scalar_to_vector_v8i16:
+; GFX900:       ; %bb.0: ; %entry
+; GFX900-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX900-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x8
+; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX900-NEXT:    s_pack_lh_b32_b16 s4, s0, s0
+; GFX900-NEXT:    v_mov_b32_e32 v6, s3
+; GFX900-NEXT:    v_add_co_u32_e32 v5, vcc, s2, v0
+; GFX900-NEXT:    v_mov_b32_e32 v2, s1
+; GFX900-NEXT:    v_mov_b32_e32 v4, s0
+; GFX900-NEXT:    v_mov_b32_e32 v1, s4
+; GFX900-NEXT:    v_mov_b32_e32 v3, s4
+; GFX900-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+; GFX900-NEXT:    flat_store_dwordx4 v[5:6], v[1:4]
+; GFX900-NEXT:    s_endpgm
+;
+; GFX906-LABEL: scalar_to_vector_v8i16:
+; GFX906:       ; %bb.0: ; %entry
+; GFX906-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX906-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x8
+; GFX906-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX906-NEXT:    s_pack_lh_b32_b16 s4, s0, s0
+; GFX906-NEXT:    v_mov_b32_e32 v6, s3
+; GFX906-NEXT:    v_add_co_u32_e32 v5, vcc, s2, v0
+; GFX906-NEXT:    v_mov_b32_e32 v2, s1
+; GFX906-NEXT:    v_mov_b32_e32 v4, s0
+; GFX906-NEXT:    v_mov_b32_e32 v1, s4
+; GFX906-NEXT:    v_mov_b32_e32 v3, s4
+; GFX906-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+; GFX906-NEXT:    flat_store_dwordx4 v[5:6], v[1:4]
+; GFX906-NEXT:    s_endpgm
+;
+; GFX908-LABEL: scalar_to_vector_v8i16:
+; GFX908:       ; %bb.0: ; %entry
+; GFX908-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX908-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x8
+; GFX908-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX908-NEXT:    s_pack_lh_b32_b16 s4, s0, s0
+; GFX908-NEXT:    v_mov_b32_e32 v6, s3
+; GFX908-NEXT:    v_add_co_u32_e32 v5, vcc, s2, v0
+; GFX908-NEXT:    v_mov_b32_e32 v2, s1
+; GFX908-NEXT:    v_mov_b32_e32 v4, s0
+; GFX908-NEXT:    v_mov_b32_e32 v1, s4
+; GFX908-NEXT:    v_mov_b32_e32 v3, s4
+; GFX908-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+; GFX908-NEXT:    flat_store_dwordx4 v[5:6], v[1:4]
+; GFX908-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: scalar_to_vector_v8i16:
+; GFX90A:       ; %bb.0: ; %entry
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x8
+; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_pack_lh_b32_b16 s4, s0, s0
+; GFX90A-NEXT:    v_mov_b32_e32 v1, s3
+; GFX90A-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v0
+; GFX90A-NEXT:    v_mov_b32_e32 v3, s1
+; GFX90A-NEXT:    v_mov_b32_e32 v5, s0
+; GFX90A-NEXT:    v_mov_b32_e32 v2, s4
+; GFX90A-NEXT:    v_mov_b32_e32 v4, s4
+; GFX90A-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
+; GFX90A-NEXT:    s_endpgm
+entry:
+  %val.1.i32 = extractelement <2 x i32> %in, i64 0
+  %val.2.vec2.i16 = bitcast i32 %val.1.i32 to <2 x i16>
+  %val.3.vec8.i16 = shufflevector <2 x i16> %val.2.vec2.i16, <2 x i16> %val.2.vec2.i16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+
+  %val.4.vec4.i32 = shufflevector <2 x i32> %in, <2 x i32> %in, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %val.5.vec8.i16 = bitcast <4 x i32> %val.4.vec4.i32 to <8 x i16>
+
+  %val.6.vec8.i16 = shufflevector <8 x i16> %val.5.vec8.i16, <8 x i16> %val.3.vec8.i16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %out.gep = getelementptr inbounds <8 x i16>, <8 x i16>* %out, i64 %tid.ext
+  store <8 x i16> %val.6.vec8.i16, <8 x i16>* %out.gep, align 16
+
+  ret void
+}
+
+define amdgpu_kernel void @scalar_to_vector_v8f16(<2 x float> %in, <8 x half>* %out) #0 {
+; GFX900-LABEL: scalar_to_vector_v8f16:
+; GFX900:       ; %bb.0: ; %entry
+; GFX900-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX900-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x8
+; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX900-NEXT:    s_lshr_b32 s4, s0, 16
+; GFX900-NEXT:    v_mov_b32_e32 v3, s0
+; GFX900-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
+; GFX900-NEXT:    v_mov_b32_e32 v6, s3
+; GFX900-NEXT:    v_add_co_u32_e32 v5, vcc, s2, v0
+; GFX900-NEXT:    v_mov_b32_e32 v2, s1
+; GFX900-NEXT:    v_mov_b32_e32 v1, s0
+; GFX900-NEXT:    v_mov_b32_e32 v4, s0
+; GFX900-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+; GFX900-NEXT:    flat_store_dwordx4 v[5:6], v[1:4]
+; GFX900-NEXT:    s_endpgm
+;
+; GFX906-LABEL: scalar_to_vector_v8f16:
+; GFX906:       ; %bb.0: ; %entry
+; GFX906-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX906-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x8
+; GFX906-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX906-NEXT:    s_lshr_b32 s4, s0, 16
+; GFX906-NEXT:    v_mov_b32_e32 v3, s0
+; GFX906-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
+; GFX906-NEXT:    v_mov_b32_e32 v6, s3
+; GFX906-NEXT:    v_add_co_u32_e32 v5, vcc, s2, v0
+; GFX906-NEXT:    v_mov_b32_e32 v2, s1
+; GFX906-NEXT:    v_mov_b32_e32 v1, s0
+; GFX906-NEXT:    v_mov_b32_e32 v4, s0
+; GFX906-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+; GFX906-NEXT:    flat_store_dwordx4 v[5:6], v[1:4]
+; GFX906-NEXT:    s_endpgm
+;
+; GFX908-LABEL: scalar_to_vector_v8f16:
+; GFX908:       ; %bb.0: ; %entry
+; GFX908-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX908-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x8
+; GFX908-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX908-NEXT:    s_lshr_b32 s4, s0, 16
+; GFX908-NEXT:    v_mov_b32_e32 v3, s0
+; GFX908-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
+; GFX908-NEXT:    v_mov_b32_e32 v6, s3
+; GFX908-NEXT:    v_add_co_u32_e32 v5, vcc, s2, v0
+; GFX908-NEXT:    v_mov_b32_e32 v2, s1
+; GFX908-NEXT:    v_mov_b32_e32 v1, s0
+; GFX908-NEXT:    v_mov_b32_e32 v4, s0
+; GFX908-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+; GFX908-NEXT:    flat_store_dwordx4 v[5:6], v[1:4]
+; GFX908-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: scalar_to_vector_v8f16:
+; GFX90A:       ; %bb.0: ; %entry
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x8
+; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_lshr_b32 s4, s0, 16
+; GFX90A-NEXT:    v_mov_b32_e32 v4, s0
+; GFX90A-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
+; GFX90A-NEXT:    v_mov_b32_e32 v1, s3
+; GFX90A-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v0
+; GFX90A-NEXT:    v_mov_b32_e32 v3, s1
+; GFX90A-NEXT:    v_mov_b32_e32 v2, s0
+; GFX90A-NEXT:    v_mov_b32_e32 v5, s0
+; GFX90A-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
+; GFX90A-NEXT:    s_endpgm
+entry:
+  %val.1.float = extractelement <2 x float> %in, i64 0
+  %val.2.vec2.half = bitcast float %val.1.float to <2 x half>
+  %val.3.vec8.half = shufflevector <2 x half> %val.2.vec2.half, <2 x half> %val.2.vec2.half, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+
+  %val.4.vec4.float = shufflevector <2 x float> %in, <2 x float> %in, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %val.5.vec8.half = bitcast <4 x float> %val.4.vec4.float to <8 x half>
+
+  %val.6.vec8.half = shufflevector <8 x half> %val.5.vec8.half, <8 x half> %val.3.vec8.half, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %out.gep = getelementptr inbounds <8 x half>, <8 x half>* %out, i64 %tid.ext
+  store <8 x half> %val.6.vec8.half, <8 x half>* %out.gep, align 16
+
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/ARM/segmented-stacks.ll b/llvm/test/CodeGen/ARM/segmented-stacks.ll
index 10ce065..3861712 100644
--- a/llvm/test/CodeGen/ARM/segmented-stacks.ll
+++ b/llvm/test/CodeGen/ARM/segmented-stacks.ll
@@ -280,4 +280,24 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
 
 }
 
+
+declare void @panic() unnamed_addr
+
+; We used to crash while compiling the following function.
+; ARM-linux-LABEL: build_should_not_segfault:
+; ARM-android-LABEL: build_should_not_segfault:
+define void @build_should_not_segfault(i8 %x) unnamed_addr #0 {
+start:
+  %_0 = icmp ult i8 %x, 16
+  %or.cond = select i1 undef, i1 true, i1 %_0
+  br i1 %or.cond, label %bb1, label %bb2
+
+bb1:
+  ret void
+
+bb2:
+  call void @panic()
+  unreachable
+}
+
 attributes #0 = { "split-stack" }
diff --git a/llvm/test/CodeGen/PowerPC/addegluecrash.ll b/llvm/test/CodeGen/PowerPC/addegluecrash.ll
index 2338ca9..404360e 100644
--- a/llvm/test/CodeGen/PowerPC/addegluecrash.ll
+++ b/llvm/test/CodeGen/PowerPC/addegluecrash.ll
@@ -23,12 +23,11 @@ define void @bn_mul_comba8(i64* nocapture %r, i64* nocapture readonly %a, i64* n
 ; CHECK-NEXT:    addc 6, 6, 7
 ; CHECK-NEXT:    addze 5, 5
 ; CHECK-NEXT:    add 3, 5, 3
-; CHECK-NEXT:    cmpld 7, 3, 5
-; CHECK-NEXT:    mfocrf 3, 1
-; CHECK-NEXT:    rlwinm 5, 3, 29, 31, 31
-; CHECK-NEXT:    # implicit-def: $x3
-; CHECK-NEXT:    mr 3, 5
-; CHECK-NEXT:    clrldi 3, 3, 32
+; CHECK-NEXT:    cmpld 3, 5
+; CHECK-NEXT:    crmove 20, 0
+; CHECK-NEXT:    li 5, 0
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    isel 3, 3, 5, 20
 ; CHECK-NEXT:    std 3, 0(4)
 ; CHECK-NEXT:    blr
   %1 = load i64, i64* %a, align 8
diff --git a/llvm/test/CodeGen/PowerPC/atomics-constant.ll b/llvm/test/CodeGen/PowerPC/atomics-constant.ll
index 80f84d8..3a7da6f 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-constant.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-constant.ll
@@ -3,16 +3,14 @@
 
 target triple = "powerpc64le-unknown-linux-gnu"
 
-@a = dso_local constant i64 zeroinitializer
+@a = dso_local global i64 zeroinitializer
 
 define i64 @foo() {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    li 4, 0
 ; CHECK-NEXT:    addis 3, 2, a@toc@ha
 ; CHECK-NEXT:    ld 3, a@toc@l(3)
-; CHECK-NEXT:    cmpd 7, 4, 4
-; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    cmpd 7, 3, 3
 ; CHECK-NEXT:    bne- 7, .+4
 ; CHECK-NEXT:    isync
 ; CHECK-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll b/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll
index bac39c1..4156455 100644
--- a/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll
@@ -14,10 +14,6 @@ define i32 @test_choice1(fp128 %a, fp128 %b) #0 {
 ; P8-NEXT:    nop
 ; P8-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; P8-NEXT:    cmplwi 3, 0
-; P8-NEXT:    li 3, 0
-; P8-NEXT:    li 4, 1
-; P8-NEXT:    iseleq 3, 3, 4
-; P8-NEXT:    cmplwi 3, 0
 ; P8-NEXT:    bne 0, .LBB0_2
 ; P8-NEXT:    b .LBB0_1
 ; P8-NEXT:  .LBB0_1: # %if.true
@@ -88,11 +84,7 @@ define i32 @test_choice2(fp128 %a, fp128 %b) #0 {
 ; P8-NEXT:    nop
 ; P8-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; P8-NEXT:    cmpwi 3, 1
-; P8-NEXT:    li 4, 0
-; P8-NEXT:    li 3, 1
-; P8-NEXT:    isellt 3, 3, 4
-; P8-NEXT:    cmplwi 3, 0
-; P8-NEXT:    bne 0, .LBB1_2
+; P8-NEXT:    blt 0, .LBB1_2
 ; P8-NEXT:    b .LBB1_1
 ; P8-NEXT:  .LBB1_1: # %if.true
 ; P8-NEXT:    bl foo
@@ -115,37 +107,30 @@ define i32 @test_choice2(fp128 %a, fp128 %b) #0 {
 ; P9:       # %bb.0: # %entry
 ; P9-NEXT:    mflr 0
 ; P9-NEXT:    std 0, 16(1)
-; P9-NEXT:    stdu 1, -128(1)
-; P9-NEXT:    xscmpuqp 7, 2, 3
-; P9-NEXT:    mfocrf 3, 1
-; P9-NEXT:    rotlwi 3, 3, 28
-; P9-NEXT:    stw 3, 124(1)
-; P9-NEXT:    mfocrf 3, 1
-; P9-NEXT:    lwz 4, 124(1)
-; P9-NEXT:    rotlwi 4, 4, 4
-; P9-NEXT:    mtocrf 1, 4
-; P9-NEXT:    clrlwi 3, 3, 31
-; P9-NEXT:    xori 4, 3, 1
-; P9-NEXT:    mfocrf 3, 1
-; P9-NEXT:    rlwinm 3, 3, 30, 31, 31
-; P9-NEXT:    xori 3, 3, 1
-; P9-NEXT:    and 3, 3, 4
-; P9-NEXT:    cmplwi 3, 0
-; P9-NEXT:    bne 0, .LBB1_2
+; P9-NEXT:    stdu 1, -112(1)
+; P9-NEXT:    xscmpuqp 0, 2, 3
+; P9-NEXT:    crmove 20, 3
+; P9-NEXT:    crnot 21, 20
+; P9-NEXT:    crmove 20, 1
+; P9-NEXT:    crnot 20, 20
+; P9-NEXT:    crand 20, 20, 21
+; P9-NEXT:    crxor 21, 21, 21
+; P9-NEXT:    crxor 20, 20, 21
+; P9-NEXT:    bc 12, 20, .LBB1_2
 ; P9-NEXT:    b .LBB1_1
 ; P9-NEXT:  .LBB1_1: # %if.true
 ; P9-NEXT:    bl foo
 ; P9-NEXT:    nop
-; P9-NEXT:    stw 3, 120(1) # 4-byte Folded Spill
+; P9-NEXT:    stw 3, 108(1) # 4-byte Folded Spill
 ; P9-NEXT:    b .LBB1_3
 ; P9-NEXT:  .LBB1_2: # %if.false
 ; P9-NEXT:    bl bar
 ; P9-NEXT:    nop
-; P9-NEXT:    stw 3, 120(1) # 4-byte Folded Spill
+; P9-NEXT:    stw 3, 108(1) # 4-byte Folded Spill
 ; P9-NEXT:  .LBB1_3: # %final
-; P9-NEXT:    lwz 3, 120(1) # 4-byte Folded Reload
+; P9-NEXT:    lwz 3, 108(1) # 4-byte Folded Reload
 ; P9-NEXT:    clrldi 3, 3, 32
-; P9-NEXT:    addi 1, 1, 128
+; P9-NEXT:    addi 1, 1, 112
 ; P9-NEXT:    ld 0, 16(1)
 ; P9-NEXT:    mtlr 0
 ; P9-NEXT:    blr
@@ -175,10 +160,8 @@ define i32 @test_choice3(fp128 %a, fp128 %b) #0 {
 ; P8-NEXT:    bl __ltkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    # kill: def $r3 killed $r3 killed $x3
-; P8-NEXT:    not 3, 3
-; P8-NEXT:    srwi 3, 3, 31
-; P8-NEXT:    cmplwi 3, 0
-; P8-NEXT:    bne 0, .LBB2_2
+; P8-NEXT:    cmpwi 3, -1
+; P8-NEXT:    bgt 0, .LBB2_2
 ; P8-NEXT:    b .LBB2_1
 ; P8-NEXT:  .LBB2_1: # %if.true
 ; P8-NEXT:    bl foo
diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll b/llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll
index 84d4614..045b67b 100644
--- a/llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll
+++ b/llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll
@@ -2,7 +2,7 @@
 
 define i1 @TestULT(double %t0) {
 ; CHECK-LABEL: TestULT:
-; CHECK: xscmpudp
+; CHECK: fcmpu
 ; CHECK: blr
 entry:
   %t1 = fcmp ult double %t0, 0.000000e+00
@@ -49,7 +49,7 @@ good:
 
 define i1 @TestUEQ(double %t0) {
 ; CHECK-LABEL: TestUEQ:
-; CHECK: xscmpudp
+; CHECK: fcmpu
 ; CHECK: blr
 entry:
   %t1 = fcmp ueq double %t0, 0.000000e+00
@@ -64,7 +64,7 @@ good:
 
 define i1 @TestUGT(double %t0) {
 ; CHECK-LABEL: TestUGT:
-; CHECK: xscmpudp
+; CHECK: fcmpu
 ; CHECK: blr
 entry:
   %t1 = fcmp ugt double %t0, 0.000000e+00
@@ -111,7 +111,7 @@ good:
 
 define i1 @TestOLE(double %t0) {
 ; CHECK-LABEL: TestOLE:
-; CHECK: xscmpudp
+; CHECK: fcmpu
 ; CHECK: blr
 entry:
   %t1 = fcmp ole double %t0, 0.000000e+00
@@ -126,7 +126,7 @@ good:
 
 define i1 @TestONE(double %t0) {
 ; CHECK-LABEL: TestONE:
-; CHECK: xscmpudp
+; CHECK: fcmpu
 ; CHECK: blr
 entry:
   %t1 = fcmp one double %t0, 0.000000e+00
@@ -173,7 +173,7 @@ good:
 
 define i1 @TestOGE(double %t0) {
 ; CHECK-LABEL: TestOGE:
-; CHECK: xscmpudp
+; CHECK: fcmpu
 ; CHECK: blr
 entry:
   %t1 = fcmp oge double %t0, 0.000000e+00
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll
index 6b6703f..d6e0dde 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll
@@ -5,25 +5,19 @@
 define i32 @une_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; CHECK-LABEL: une_ppcf128:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xscmpudp cr7, f1, f3
-; CHECK-NEXT:    mfocrf r3, 1
-; CHECK-NEXT:    rlwinm r3, r3, 31, 31, 31
-; CHECK-NEXT:    xscmpudp cr7, f2, f4
-; CHECK-NEXT:    mfocrf r4, 1
-; CHECK-NEXT:    rlwinm r4, r4, 31, 31, 31
-; CHECK-NEXT:    xori r4, r4, 1
-; CHECK-NEXT:    and r4, r3, r4
-; CHECK-NEXT:    xscmpudp cr7, f1, f3
-; CHECK-NEXT:    mfocrf r3, 1
-; CHECK-NEXT:    rlwinm r3, r3, 31, 31, 31
-; CHECK-NEXT:    xori r3, r3, 1
-; CHECK-NEXT:    xscmpudp cr7, f1, f3
-; CHECK-NEXT:    mfocrf r5, 1
-; CHECK-NEXT:    rlwinm r5, r5, 31, 31, 31
-; CHECK-NEXT:    xori r5, r5, 1
-; CHECK-NEXT:    and r3, r3, r5
-; CHECK-NEXT:    or r3, r3, r4
-; CHECK-NEXT:    # kill: def $r4 killed $r3
+; CHECK-NEXT:    fcmpu cr0, f1, f3
+; CHECK-NEXT:    crmove 4*cr5+lt, eq
+; CHECK-NEXT:    fcmpu cr1, f2, f4
+; CHECK-NEXT:    crmove 4*cr5+gt, 4*cr1+eq
+; CHECK-NEXT:    crnot 4*cr5+gt, 4*cr5+gt
+; CHECK-NEXT:    crand 4*cr5+gt, 4*cr5+lt, 4*cr5+gt
+; CHECK-NEXT:    crmove 4*cr5+lt, eq
+; CHECK-NEXT:    crnot 4*cr5+lt, 4*cr5+lt
+; CHECK-NEXT:    crand 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
+; CHECK-NEXT:    cror 4*cr5+lt, 4*cr5+lt, 4*cr5+gt
+; CHECK-NEXT:    li r4, 0
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    isel r3, r3, r4, 4*cr5+lt
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    blr
 entry:
@@ -36,28 +30,19 @@ entry:
 define i32 @ogt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; CHECK-LABEL: ogt_ppcf128:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xscmpudp cr7, f1, f3
-; CHECK-NEXT:    mfocrf r3, 1
-; CHECK-NEXT:    rlwinm r3, r3, 31, 31, 31
-; CHECK-NEXT:    xscmpudp cr7, f2, f4
-; CHECK-NEXT:    mfocrf r4, 1
-; CHECK-NEXT:    rlwinm r4, r4, 30, 31, 31
-; CHECK-NEXT:    and r4, r3, r4
-; CHECK-NEXT:    xscmpudp cr0, f1, f3
-; CHECK-NEXT:    mfocrf r3, 128
-; CHECK-NEXT:    stw r3, -4(r1)
-; CHECK-NEXT:    xscmpudp cr7, f1, f3
-; CHECK-NEXT:    mfocrf r3, 1
-; CHECK-NEXT:    lwz r5, -4(r1)
-; CHECK-NEXT:    rotlwi r5, r5, 4
-; CHECK-NEXT:    mtocrf 1, r5
-; CHECK-NEXT:    rlwinm r5, r3, 30, 31, 31
-; CHECK-NEXT:    mfocrf r3, 1
-; CHECK-NEXT:    rlwinm r3, r3, 31, 31, 31
-; CHECK-NEXT:    xori r3, r3, 1
-; CHECK-NEXT:    and r3, r3, r5
-; CHECK-NEXT:    or r3, r3, r4
-; CHECK-NEXT:    # kill: def $r4 killed $r3
+; CHECK-NEXT:    fcmpu cr0, f1, f3
+; CHECK-NEXT:    crmove 4*cr5+lt, eq
+; CHECK-NEXT:    fcmpu cr1, f2, f4
+; CHECK-NEXT:    crmove 4*cr5+gt, 4*cr1+gt
+; CHECK-NEXT:    crand 4*cr5+gt, 4*cr5+lt, 4*cr5+gt
+; CHECK-NEXT:    crmove 4*cr5+lt, eq
+; CHECK-NEXT:    crnot 4*cr5+lt, 4*cr5+lt
+; CHECK-NEXT:    crmove 4*cr5+eq, gt
+; CHECK-NEXT:    crand 4*cr5+lt, 4*cr5+lt, 4*cr5+eq
+; CHECK-NEXT:    cror 4*cr5+lt, 4*cr5+lt, 4*cr5+gt
+; CHECK-NEXT:    li r4, 0
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    isel r3, r3, r4, 4*cr5+lt
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    blr
 entry:
@@ -69,12 +54,14 @@ entry:
 define i1 @test_f128(fp128 %a, fp128 %b) #0 {
 ; CHECK-LABEL: test_f128:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xscmpuqp cr7, v2, v3
-; CHECK-NEXT:    mfocrf r3, 1
-; CHECK-NEXT:    rlwinm r3, r3, 31, 31, 31
-; CHECK-NEXT:    xori r4, r3, 1
-; CHECK-NEXT:    # implicit-def: $x3
-; CHECK-NEXT:    mr r3, r4
+; CHECK-NEXT:    xscmpuqp cr0, v2, v3
+; CHECK-NEXT:    crmove 4*cr5+lt, eq
+; CHECK-NEXT:    xscmpuqp cr0, v2, v3
+; CHECK-NEXT:    crmove 4*cr5+gt, eq
+; CHECK-NEXT:    crnor 4*cr5+lt, 4*cr5+lt, 4*cr5+gt
+; CHECK-NEXT:    li r4, 0
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    isel r3, r3, r4, 4*cr5+lt
 ; CHECK-NEXT:    blr
 entry:
   %0 = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"une", metadata !"fpexcept.strict") #0
@@ -84,11 +71,9 @@ entry:
 define i1 @testbr_f64(double %a, double %b) #0 {
 ; CHECK-LABEL: testbr_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xscmpudp cr7, f1, f2
-; CHECK-NEXT:    mfocrf r3, 1
-; CHECK-NEXT:    rlwinm r3, r3, 31, 31, 31
-; CHECK-NEXT:    cmplwi r3, 0
-; CHECK-NEXT:    bne cr0, .LBB3_2
+; CHECK-NEXT:    fcmpu cr0, f1, f2
+; CHECK-NEXT:    crmove 4*cr5+lt, eq
+; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB3_2
 ; CHECK-NEXT:    b .LBB3_1
 ; CHECK-NEXT:  .LBB3_1: # %tr
 ; CHECK-NEXT:    li r3, -1
@@ -108,11 +93,9 @@ fl:
 define i1 @testbr_f32(float %a, float %b) #0 {
 ; CHECK-LABEL: testbr_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fcmpu cr7, f1, f2
-; CHECK-NEXT:    mfocrf r3, 1
-; CHECK-NEXT:    rlwinm r3, r3, 31, 31, 31
-; CHECK-NEXT:    cmplwi r3, 0
-; CHECK-NEXT:    bne cr0, .LBB4_2
+; CHECK-NEXT:    fcmpu cr0, f1, f2
+; CHECK-NEXT:    crmove 4*cr5+lt, eq
+; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB4_2
 ; CHECK-NEXT:    b .LBB4_1
 ; CHECK-NEXT:  .LBB4_1: # %tr
 ; CHECK-NEXT:    li r3, -1
diff --git a/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll b/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll
index 627db54..4d55737 100644
--- a/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll
+++ b/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll
@@ -7,11 +7,11 @@ define i1 @Test(double %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpsxws 0, 1
 ; CHECK-NEXT:    mffprwz 3, 0
-; CHECK-NEXT:    xori 3, 3, 65534
-; CHECK-NEXT:    cntlzw 3, 3
-; CHECK-NEXT:    srwi 4, 3, 5
-; CHECK-NEXT:    # implicit-def: $x3
-; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    cmplwi 3, 65534
+; CHECK-NEXT:    crmove 20, 2
+; CHECK-NEXT:    li 4, 0
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    isel 3, 3, 4, 20
 ; CHECK-NEXT:    blr
 entry:
   %conv = fptoui double %a to i16
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-byte-loads.ll b/llvm/test/CodeGen/PowerPC/pcrel-byte-loads.ll
index 998dc3c..3082c04 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-byte-loads.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-byte-loads.ll
@@ -45,6 +45,7 @@ define void @i32_ZextLoad_i1() {
 ; CHECK-LE-LABEL: i32_ZextLoad_i1:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    plbz r3, GlobLd1@PCREL(0), 1
+; CHECK-LE-NEXT:    clrldi r3, r3, 63
 ; CHECK-LE-NEXT:    pstb r3, GlobSt1@PCREL(0), 1
 ; CHECK-LE-NEXT:    blr
 ;
@@ -53,6 +54,7 @@ define void @i32_ZextLoad_i1() {
 ; CHECK-BE-NEXT:    addis r3, r2, GlobLd1@toc@ha
 ; CHECK-BE-NEXT:    addis r4, r2, GlobSt1@toc@ha
 ; CHECK-BE-NEXT:    lbz r3, GlobLd1@toc@l(r3)
+; CHECK-BE-NEXT:    clrldi r3, r3, 63
 ; CHECK-BE-NEXT:    stb r3, GlobSt1@toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -77,11 +79,11 @@ define dso_local i1 @i32_ExtLoad_i1() local_unnamed_addr #0 {
 ; CHECK-LE-NEXT:    paddi r3, 0, Glob1@PCREL, 1
 ; CHECK-LE-NEXT:    paddi r4, 0, Glob2@PCREL, 1
 ; CHECK-LE-NEXT:    bl Decl@notoc
-; CHECK-LE-NEXT:    plbz r4, GlobLd1@PCREL(0), 1
-; CHECK-LE-NEXT:    cmplwi r3, 0
-; CHECK-LE-NEXT:    li r3, 1
-; CHECK-LE-NEXT:    iseleq r3, 0, r3
-; CHECK-LE-NEXT:    and r3, r3, r4
+; CHECK-LE-NEXT:    cmpwi cr1, r3, 0
+; CHECK-LE-NEXT:    plbz r3, GlobLd1@PCREL(0), 1
+; CHECK-LE-NEXT:    andi. r3, r3, 1
+; CHECK-LE-NEXT:    crandc 4*cr5+lt, gt, 4*cr1+eq
+; CHECK-LE-NEXT:    setbc r3, 4*cr5+lt
 ; CHECK-LE-NEXT:    addi r1, r1, 32
 ; CHECK-LE-NEXT:    ld r0, 16(r1)
 ; CHECK-LE-NEXT:    mtlr r0
@@ -100,12 +102,12 @@ define dso_local i1 @i32_ExtLoad_i1() local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    addi r4, r4, Glob2@toc@l
 ; CHECK-BE-NEXT:    bl Decl
 ; CHECK-BE-NEXT:    nop
-; CHECK-BE-NEXT:    addis r4, r2, GlobLd1@toc@ha
-; CHECK-BE-NEXT:    cmplwi r3, 0
-; CHECK-BE-NEXT:    li r3, 1
-; CHECK-BE-NEXT:    lbz r4, GlobLd1@toc@l(r4)
-; CHECK-BE-NEXT:    iseleq r3, 0, r3
-; CHECK-BE-NEXT:    and r3, r3, r4
+; CHECK-BE-NEXT:    cmpwi cr1, r3, 0
+; CHECK-BE-NEXT:    addis r3, r2, GlobLd1@toc@ha
+; CHECK-BE-NEXT:    lbz r3, GlobLd1@toc@l(r3)
+; CHECK-BE-NEXT:    andi. r3, r3, 1
+; CHECK-BE-NEXT:    crandc 4*cr5+lt, gt, 4*cr1+eq
+; CHECK-BE-NEXT:    setbc r3, 4*cr5+lt
 ; CHECK-BE-NEXT:    addi r1, r1, 112
 ; CHECK-BE-NEXT:    ld r0, 16(r1)
 ; CHECK-BE-NEXT:    mtlr r0
diff --git a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
index 066a04f..83fa3fba 100644
--- a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
@@ -227,7 +227,7 @@ define i1 @test_urem_oversized(i66 %X) nounwind {
 ; PPC-NEXT:    slwi 5, 5, 1
 ; PPC-NEXT:    add 6, 6, 9
 ; PPC-NEXT:    add 3, 5, 3
-; PPC-NEXT:    rotlwi  8, 11, 31
+; PPC-NEXT:    rotlwi 8, 11, 31
 ; PPC-NEXT:    sub 4, 6, 4
 ; PPC-NEXT:    lis 5, -5526
 ; PPC-NEXT:    rlwimi 8, 7, 31, 0, 0
@@ -235,7 +235,7 @@ define i1 @test_urem_oversized(i66 %X) nounwind {
 ; PPC-NEXT:    add 3, 4, 3
 ; PPC-NEXT:    ori 5, 5, 61135
 ; PPC-NEXT:    rlwimi 7, 3, 31, 0, 0
-; PPC-NEXT:    cmplw   8, 5
+; PPC-NEXT:    cmplw 8, 5
 ; PPC-NEXT:    cmplwi 1, 7, 13
 ; PPC-NEXT:    rlwinm 3, 3, 31, 31, 31
 ; PPC-NEXT:    crand 20, 6, 0
diff --git a/llvm/test/CodeGen/RISCV/machine-cse.ll b/llvm/test/CodeGen/RISCV/machine-cse.ll
index cd8ae1c..2e36f3c 100644
--- a/llvm/test/CodeGen/RISCV/machine-cse.ll
+++ b/llvm/test/CodeGen/RISCV/machine-cse.ll
@@ -504,3 +504,207 @@ trueblock:
 falseblock:
   ret void
 }
+
+define void @commute_fadd_f16(half %x, half %y, half* %p1, half* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fadd_f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fadd.h ft0, fa0, fa1
+; RV32-NEXT:    fsh ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB14_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsh ft0, 0(a0)
+; RV32-NEXT:  .LBB14_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fadd_f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fadd.h ft0, fa0, fa1
+; RV64-NEXT:    fsh ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB14_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsh ft0, 0(a0)
+; RV64-NEXT:  .LBB14_2: # %falseblock
+; RV64-NEXT:    ret
+  %a = fadd half %x, %y
+  store half %a, half* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %b = fadd half %y, %x
+  store half %b, half* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_fadd_f32(float %x, float %y, float* %p1, float* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fadd_f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fadd.s ft0, fa0, fa1
+; RV32-NEXT:    fsw ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB15_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsw ft0, 0(a0)
+; RV32-NEXT:  .LBB15_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fadd_f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fadd.s ft0, fa0, fa1
+; RV64-NEXT:    fsw ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB15_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsw ft0, 0(a0)
+; RV64-NEXT:  .LBB15_2: # %falseblock
+; RV64-NEXT:    ret
+  %a = fadd float %x, %y
+  store float %a, float* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %b = fadd float %y, %x
+  store float %b, float* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_fadd_f64(double %x, double %y, double* %p1, double* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fadd_f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fadd.d ft0, fa0, fa1
+; RV32-NEXT:    fsd ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB16_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsd ft0, 0(a0)
+; RV32-NEXT:  .LBB16_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fadd_f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fadd.d ft0, fa0, fa1
+; RV64-NEXT:    fsd ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB16_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsd ft0, 0(a0)
+; RV64-NEXT:  .LBB16_2: # %falseblock
+; RV64-NEXT:    ret
+  %a = fadd double %x, %y
+  store double %a, double* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %b = fadd double %y, %x
+  store double %b, double* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_feq_f16(half %x, half %y, i8* %p1, i8* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_feq_f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    feq.h a1, fa0, fa1
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB17_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:  .LBB17_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_feq_f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    feq.h a1, fa0, fa1
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB17_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:  .LBB17_2: # %falseblock
+; RV64-NEXT:    ret
+  %a = fcmp oeq half %x, %y
+  %b = zext i1 %a to i8
+  store i8 %b, i8* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %c = fcmp oeq half %y, %x
+  %d = zext i1 %c to i8
+  store i8 %d, i8* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_feq_f32(float %x, float %y, i8* %p1, i8* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_feq_f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    feq.s a1, fa0, fa1
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB18_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:  .LBB18_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_feq_f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    feq.s a1, fa0, fa1
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB18_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:  .LBB18_2: # %falseblock
+; RV64-NEXT:    ret
+  %a = fcmp oeq float %x, %y
+  %b = zext i1 %a to i8
+  store i8 %b, i8* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %c = fcmp oeq float %y, %x
+  %d = zext i1 %c to i8
+  store i8 %d, i8* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_feq_f64(double %x, double %y, i8* %p1, i8* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_feq_f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    feq.d a1, fa0, fa1
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB19_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:  .LBB19_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_feq_f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    feq.d a1, fa0, fa1
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB19_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:  .LBB19_2: # %falseblock
+; RV64-NEXT:    ret
+  %a = fcmp oeq double %x, %y
+  %b = zext i1 %a to i8
+  store i8 %b, i8* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %c = fcmp oeq double %y, %x
+  %d = zext i1 %c to i8
+  store i8 %d, i8* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll b/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll
index deac535..26c7509 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll
@@ -1431,7 +1431,9 @@ define <vscale x 1 x i1> @intrinsic_vmsge_mask_vx_nxv1i64_i64(<vscale x 1 x i64>
 ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
-; RV64-NEXT:    vmslt.vx v0, v8, a0, v0.t
+; RV64-NEXT:    vmslt.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
+; RV64-NEXT:    vmxor.mm v0, v8, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/split-offsets.ll b/llvm/test/CodeGen/RISCV/split-offsets.ll
index d08157b8..35671d8 100644
--- a/llvm/test/CodeGen/RISCV/split-offsets.ll
+++ b/llvm/test/CodeGen/RISCV/split-offsets.ll
@@ -118,3 +118,38 @@ while_end:
   ret void
 }
 
+; GEPs have been manually split so the base GEP does not get used by any memory
+; instructions. Make sure we use a small offset in each of the stores.
+define void @test3([65536 x i32]* %t) {
+; RV32I-LABEL: test3:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a1, 20
+; RV32I-NEXT:    addi a1, a1, -1920
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    li a1, 2
+; RV32I-NEXT:    sw a1, 4(a0)
+; RV32I-NEXT:    li a1, 3
+; RV32I-NEXT:    sw a1, 8(a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test3:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    lui a1, 20
+; RV64I-NEXT:    addiw a1, a1, -1920
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    li a1, 2
+; RV64I-NEXT:    sw a1, 4(a0)
+; RV64I-NEXT:    li a1, 3
+; RV64I-NEXT:    sw a1, 8(a0)
+; RV64I-NEXT:    ret
+entry:
+  %0 = bitcast [65536 x i32]* %t to i8*
+  %splitgep = getelementptr i8, i8* %0, i64 80000
+  %1 = getelementptr i8, i8* %splitgep, i64 4
+  %2 = bitcast i8* %1 to i32*
+  %3 = getelementptr i8, i8* %splitgep, i64 8
+  %4 = bitcast i8* %3 to i32*
+  store i32 2, i32* %2, align 4
+  store i32 3, i32* %4, align 4
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index 3d95509..e86cedc 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -4040,6 +4040,236 @@ continue:
   ret i1 true
 }
 
+define zeroext i1 @uaddo.i64.constant(i64 %v1, i64* %res) {
+; RV32-LABEL: uaddo.i64.constant:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    mv a3, a0
+; RV32-NEXT:    addi a4, a0, 2
+; RV32-NEXT:    sltu a0, a4, a0
+; RV32-NEXT:    add a5, a1, a0
+; RV32-NEXT:    bgeu a4, a3, .LBB66_2
+; RV32-NEXT:  # %bb.1: # %entry
+; RV32-NEXT:    sltu a0, a5, a1
+; RV32-NEXT:  .LBB66_2: # %entry
+; RV32-NEXT:    sw a4, 0(a2)
+; RV32-NEXT:    sw a5, 4(a2)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo.i64.constant:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 2
+; RV64-NEXT:    sltu a0, a2, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+;
+; RV32ZBA-LABEL: uaddo.i64.constant:
+; RV32ZBA:       # %bb.0: # %entry
+; RV32ZBA-NEXT:    mv a3, a0
+; RV32ZBA-NEXT:    addi a4, a0, 2
+; RV32ZBA-NEXT:    sltu a0, a4, a0
+; RV32ZBA-NEXT:    add a5, a1, a0
+; RV32ZBA-NEXT:    bgeu a4, a3, .LBB66_2
+; RV32ZBA-NEXT:  # %bb.1: # %entry
+; RV32ZBA-NEXT:    sltu a0, a5, a1
+; RV32ZBA-NEXT:  .LBB66_2: # %entry
+; RV32ZBA-NEXT:    sw a4, 0(a2)
+; RV32ZBA-NEXT:    sw a5, 4(a2)
+; RV32ZBA-NEXT:    ret
+;
+; RV64ZBA-LABEL: uaddo.i64.constant:
+; RV64ZBA:       # %bb.0: # %entry
+; RV64ZBA-NEXT:    addi a2, a0, 2
+; RV64ZBA-NEXT:    sltu a0, a2, a0
+; RV64ZBA-NEXT:    sd a2, 0(a1)
+; RV64ZBA-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i64.constant_2048(i64 %v1, i64* %res) {
+; RV32-LABEL: uaddo.i64.constant_2048:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    mv a3, a0
+; RV32-NEXT:    addi a0, a0, 1024
+; RV32-NEXT:    addi a4, a0, 1024
+; RV32-NEXT:    sltu a0, a4, a3
+; RV32-NEXT:    add a5, a1, a0
+; RV32-NEXT:    bgeu a4, a3, .LBB67_2
+; RV32-NEXT:  # %bb.1: # %entry
+; RV32-NEXT:    sltu a0, a5, a1
+; RV32-NEXT:  .LBB67_2: # %entry
+; RV32-NEXT:    sw a4, 0(a2)
+; RV32-NEXT:    sw a5, 4(a2)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo.i64.constant_2048:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 1024
+; RV64-NEXT:    addi a2, a2, 1024
+; RV64-NEXT:    sltu a0, a2, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+;
+; RV32ZBA-LABEL: uaddo.i64.constant_2048:
+; RV32ZBA:       # %bb.0: # %entry
+; RV32ZBA-NEXT:    mv a3, a0
+; RV32ZBA-NEXT:    addi a0, a0, 1024
+; RV32ZBA-NEXT:    addi a4, a0, 1024
+; RV32ZBA-NEXT:    sltu a0, a4, a3
+; RV32ZBA-NEXT:    add a5, a1, a0
+; RV32ZBA-NEXT:    bgeu a4, a3, .LBB67_2
+; RV32ZBA-NEXT:  # %bb.1: # %entry
+; RV32ZBA-NEXT:    sltu a0, a5, a1
+; RV32ZBA-NEXT:  .LBB67_2: # %entry
+; RV32ZBA-NEXT:    sw a4, 0(a2)
+; RV32ZBA-NEXT:    sw a5, 4(a2)
+; RV32ZBA-NEXT:    ret
+;
+; RV64ZBA-LABEL: uaddo.i64.constant_2048:
+; RV64ZBA:       # %bb.0: # %entry
+; RV64ZBA-NEXT:    addi a2, a0, 1024
+; RV64ZBA-NEXT:    addi a2, a2, 1024
+; RV64ZBA-NEXT:    sltu a0, a2, a0
+; RV64ZBA-NEXT:    sd a2, 0(a1)
+; RV64ZBA-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2048)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i64.constant_2049(i64 %v1, i64* %res) {
+; RV32-LABEL: uaddo.i64.constant_2049:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    mv a3, a0
+; RV32-NEXT:    addi a0, a0, 1025
+; RV32-NEXT:    addi a4, a0, 1024
+; RV32-NEXT:    sltu a0, a4, a3
+; RV32-NEXT:    add a5, a1, a0
+; RV32-NEXT:    bgeu a4, a3, .LBB68_2
+; RV32-NEXT:  # %bb.1: # %entry
+; RV32-NEXT:    sltu a0, a5, a1
+; RV32-NEXT:  .LBB68_2: # %entry
+; RV32-NEXT:    sw a4, 0(a2)
+; RV32-NEXT:    sw a5, 4(a2)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo.i64.constant_2049:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 1025
+; RV64-NEXT:    addi a2, a2, 1024
+; RV64-NEXT:    sltu a0, a2, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+;
+; RV32ZBA-LABEL: uaddo.i64.constant_2049:
+; RV32ZBA:       # %bb.0: # %entry
+; RV32ZBA-NEXT:    mv a3, a0
+; RV32ZBA-NEXT:    addi a0, a0, 1025
+; RV32ZBA-NEXT:    addi a4, a0, 1024
+; RV32ZBA-NEXT:    sltu a0, a4, a3
+; RV32ZBA-NEXT:    add a5, a1, a0
+; RV32ZBA-NEXT:    bgeu a4, a3, .LBB68_2
+; RV32ZBA-NEXT:  # %bb.1: # %entry
+; RV32ZBA-NEXT:    sltu a0, a5, a1
+; RV32ZBA-NEXT:  .LBB68_2: # %entry
+; RV32ZBA-NEXT:    sw a4, 0(a2)
+; RV32ZBA-NEXT:    sw a5, 4(a2)
+; RV32ZBA-NEXT:    ret
+;
+; RV64ZBA-LABEL: uaddo.i64.constant_2049:
+; RV64ZBA:       # %bb.0: # %entry
+; RV64ZBA-NEXT:    addi a2, a0, 1025
+; RV64ZBA-NEXT:    addi a2, a2, 1024
+; RV64ZBA-NEXT:    sltu a0, a2, a0
+; RV64ZBA-NEXT:    sd a2, 0(a1)
+; RV64ZBA-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2049)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define i64 @uaddo.i64.constant_setcc_on_overflow_flag(i64* %p) {
+; RV32-LABEL: uaddo.i64.constant_setcc_on_overflow_flag:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    lw a4, 0(a0)
+; RV32-NEXT:    lw a3, 4(a0)
+; RV32-NEXT:    addi a0, a4, 2
+; RV32-NEXT:    sltu a2, a0, a4
+; RV32-NEXT:    add a1, a3, a2
+; RV32-NEXT:    bltu a0, a4, .LBB69_3
+; RV32-NEXT:  # %bb.1: # %entry
+; RV32-NEXT:    beqz a2, .LBB69_4
+; RV32-NEXT:  .LBB69_2: # %IfNoOverflow
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB69_3: # %entry
+; RV32-NEXT:    sltu a2, a1, a3
+; RV32-NEXT:    bnez a2, .LBB69_2
+; RV32-NEXT:  .LBB69_4: # %IfOverflow
+; RV32-NEXT:    li a0, 0
+; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo.i64.constant_setcc_on_overflow_flag:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    ld a1, 0(a0)
+; RV64-NEXT:    addi a0, a1, 2
+; RV64-NEXT:    bltu a0, a1, .LBB69_2
+; RV64-NEXT:  # %bb.1: # %IfOverflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:  .LBB69_2: # %IfNoOverflow
+; RV64-NEXT:    ret
+;
+; RV32ZBA-LABEL: uaddo.i64.constant_setcc_on_overflow_flag:
+; RV32ZBA:       # %bb.0: # %entry
+; RV32ZBA-NEXT:    lw a4, 0(a0)
+; RV32ZBA-NEXT:    lw a3, 4(a0)
+; RV32ZBA-NEXT:    addi a0, a4, 2
+; RV32ZBA-NEXT:    sltu a2, a0, a4
+; RV32ZBA-NEXT:    add a1, a3, a2
+; RV32ZBA-NEXT:    bltu a0, a4, .LBB69_3
+; RV32ZBA-NEXT:  # %bb.1: # %entry
+; RV32ZBA-NEXT:    beqz a2, .LBB69_4
+; RV32ZBA-NEXT:  .LBB69_2: # %IfNoOverflow
+; RV32ZBA-NEXT:    ret
+; RV32ZBA-NEXT:  .LBB69_3: # %entry
+; RV32ZBA-NEXT:    sltu a2, a1, a3
+; RV32ZBA-NEXT:    bnez a2, .LBB69_2
+; RV32ZBA-NEXT:  .LBB69_4: # %IfOverflow
+; RV32ZBA-NEXT:    li a0, 0
+; RV32ZBA-NEXT:    li a1, 0
+; RV32ZBA-NEXT:    ret
+;
+; RV64ZBA-LABEL: uaddo.i64.constant_setcc_on_overflow_flag:
+; RV64ZBA:       # %bb.0: # %entry
+; RV64ZBA-NEXT:    ld a1, 0(a0)
+; RV64ZBA-NEXT:    addi a0, a1, 2
+; RV64ZBA-NEXT:    bltu a0, a1, .LBB69_2
+; RV64ZBA-NEXT:  # %bb.1: # %IfOverflow
+; RV64ZBA-NEXT:    li a0, 0
+; RV64ZBA-NEXT:  .LBB69_2: # %IfNoOverflow
+; RV64ZBA-NEXT:    ret
+entry:
+  %v1 = load i64, i64* %p
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %IfNoOverflow, label %IfOverflow
+IfOverflow:
+  ret i64 0
+IfNoOverflow:
+  ret i64 %val
+}
+
 declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
 declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
 declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/Thumb/segmented-stacks.ll b/llvm/test/CodeGen/Thumb/segmented-stacks.ll
index 6ad8cf9..daebafe 100644
--- a/llvm/test/CodeGen/Thumb/segmented-stacks.ll
+++ b/llvm/test/CodeGen/Thumb/segmented-stacks.ll
@@ -275,4 +275,24 @@ define void @test_nostack() #0 {
 ; Thumb-linux-NOT:   bl __morestack
 }
 
+
+declare void @panic() unnamed_addr
+
+; We used to crash while compiling the following function.
+; Thumb-linux-LABEL: build_should_not_segfault:
+; Thumb-android-LABEL: build_should_not_segfault:
+define void @build_should_not_segfault(i8 %x) unnamed_addr #0 {
+start:
+  %_0 = icmp ult i8 %x, 16
+  %or.cond = select i1 undef, i1 true, i1 %_0
+  br i1 %or.cond, label %bb1, label %bb2
+
+bb1:
+  ret void
+
+bb2:
+  call void @panic()
+  unreachable
+}
+
 attributes #0 = { "split-stack" }
diff --git a/llvm/test/CodeGen/Thumb2/segmented-stacks.ll b/llvm/test/CodeGen/Thumb2/segmented-stacks.ll
index 0f34e9c..70892073 100644
--- a/llvm/test/CodeGen/Thumb2/segmented-stacks.ll
+++ b/llvm/test/CodeGen/Thumb2/segmented-stacks.ll
@@ -171,4 +171,24 @@ define fastcc void @test_fastcc_large() #0 {
 ; ARM-NEXT:    .long   40192
 }
 
+
+declare void @panic() unnamed_addr
+
+; We used to crash while compiling the following function.
+; THUMB-LABEL: build_should_not_segfault:
+; ARM-LABEL: build_should_not_segfault:
+define void @build_should_not_segfault(i8 %x) unnamed_addr #0 {
+start:
+  %_0 = icmp ult i8 %x, 16
+  %or.cond = select i1 undef, i1 true, i1 %_0
+  br i1 %or.cond, label %bb1, label %bb2
+
+bb1:
+  ret void
+
+bb2:
+  call void @panic()
+  unreachable
+}
+
 attributes #0 = { "split-stack" }
diff --git a/llvm/test/CodeGen/X86/add-and-not.ll b/llvm/test/CodeGen/X86/add-and-not.ll
new file mode 100644
index 0000000..bf8e507
--- /dev/null
+++ b/llvm/test/CodeGen/X86/add-and-not.ll
@@ -0,0 +1,205 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+
+declare void @use(i8)
+
+define i8 @add_and_xor(i8 %x, i8 %y) {
+; CHECK-LABEL: add_and_xor:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+  %xor = xor i8 %x, -1
+  %and = and i8 %xor, %y
+  %add = add i8 %and, %x
+  ret i8 %add
+}
+
+define i8 @add_and_xor_wrong_const(i8 %x, i8 %y) {
+; CHECK-LABEL: add_and_xor_wrong_const:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    xorb $-2, %al
+; CHECK-NEXT:    andb %sil, %al
+; CHECK-NEXT:    addb %dil, %al
+; CHECK-NEXT:    retq
+  %xor = xor i8 %x, -2
+  %and = and i8 %xor, %y
+  %add = add i8 %and, %x
+  ret i8 %add
+}
+
+define i8 @add_and_xor_wrong_op(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: add_and_xor_wrong_op:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    notb %dl
+; CHECK-NEXT:    andb %sil, %dl
+; CHECK-NEXT:    leal (%rdx,%rdi), %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+  %xor = xor i8 %z, -1
+  %and = and i8 %xor, %y
+  %add = add i8 %and, %x
+  ret i8 %add
+}
+
+define i8 @add_and_xor_commuted1(i8 %x, i8 %y) {
+; CHECK-LABEL: add_and_xor_commuted1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+  %xor = xor i8 %x, -1
+  %and = and i8 %y, %xor
+  %add = add i8 %and, %x
+  ret i8 %add
+}
+
+define i8 @add_and_xor_commuted2(i8 %x, i8 %y) {
+; CHECK-LABEL: add_and_xor_commuted2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+  %xor = xor i8 %x, -1
+  %and = and i8 %xor, %y
+  %add = add i8 %x, %and
+  ret i8 %add
+}
+
+define i8 @add_and_xor_commuted3(i8 %x, i8 %y) {
+; CHECK-LABEL: add_and_xor_commuted3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+  %xor = xor i8 %x, -1
+  %and = and i8 %y, %xor
+  %add = add i8 %x, %and
+  ret i8 %add
+}
+
+define i8 @add_and_xor_extra_use(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: add_and_xor_extra_use:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:    movl %edi, %r14d
+; CHECK-NEXT:    movl %r14d, %eax
+; CHECK-NEXT:    notb %al
+; CHECK-NEXT:    movzbl %al, %ebp
+; CHECK-NEXT:    movl %ebp, %edi
+; CHECK-NEXT:    callq use@PLT
+; CHECK-NEXT:    andb %bl, %bpl
+; CHECK-NEXT:    movzbl %bpl, %edi
+; CHECK-NEXT:    callq use@PLT
+; CHECK-NEXT:    orb %r14b, %bl
+; CHECK-NEXT:    movl %ebx, %eax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    retq
+  %xor = xor i8 %x, -1
+  call void @use(i8 %xor)
+  %and = and i8 %xor, %y
+  call void @use(i8 %and)
+  %add = add i8 %and, %x
+  ret i8 %add
+}
+
+define i64 @add_and_xor_const(i64 %x) {
+; CHECK-LABEL: add_and_xor_const:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    notl %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    retq
+  %xor = xor i64 %x, -1
+  %and = and i64 %xor, 1
+  %add = add i64 %and, %x
+  ret i64 %add
+}
+
+define i64 @add_and_xor_const_wrong_op(i64 %x, i64 %y) {
+; CHECK-LABEL: add_and_xor_const_wrong_op:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    notl %esi
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    leaq (%rsi,%rdi), %rax
+; CHECK-NEXT:    retq
+  %xor = xor i64 %y, -1
+  %and = and i64 %xor, 1
+  %add = add i64 %and, %x
+  ret i64 %add
+}
+
+define i64 @add_and_xor_const_explicit_trunc(i64 %x) {
+; CHECK-LABEL: add_and_xor_const_explicit_trunc:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    notl %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    retq
+  %trunc = trunc i64 %x to i32
+  %xor = xor i32 %trunc, -1
+  %ext = sext i32 %xor to i64
+  %and = and i64 %ext, 1
+  %add = add i64 %and, %x
+  ret i64 %add
+}
+
+define i64 @add_and_xor_const_explicit_trunc_wrong_mask(i64 %x) {
+; CHECK-LABEL: add_and_xor_const_explicit_trunc_wrong_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    notl %eax
+; CHECK-NEXT:    movslq %eax, %rcx
+; CHECK-NEXT:    movabsq $4294967297, %rax # imm = 0x100000001
+; CHECK-NEXT:    andq %rcx, %rax
+; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    retq
+  %trunc = trunc i64 %x to i32
+  %xor = xor i32 %trunc, -1
+  %ext = sext i32 %xor to i64
+  %and = and i64 %ext, 4294967297
+  %add = add i64 %and, %x
+  ret i64 %add
+}
+
+define i8* @gep_and_xor(i8* %a, i64 %m) {
+; CHECK-LABEL: gep_and_xor:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    orq %rsi, %rax
+; CHECK-NEXT:    retq
+  %old = ptrtoint i8* %a to i64
+  %old.not = and i64 %old, %m
+  %offset = xor i64 %old.not, %m
+  %p = getelementptr i8, i8* %a, i64 %offset
+  ret i8* %p
+}
+
+define i8* @gep_and_xor_const(i8* %a) {
+; CHECK-LABEL: gep_and_xor_const:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    notl %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    retq
+  %old = ptrtoint i8* %a to i64
+  %old.not = and i64 %old, 1
+  %offset = xor i64 %old.not, 1
+  %p = getelementptr i8, i8* %a, i64 %offset
+  ret i8* %p
+}
diff --git a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
index d081a2e..d04819a 100644
--- a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
+++ b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
 
 ; The mask is all-ones, potentially shifted.
@@ -11,12 +11,12 @@
 ; lshr
 
 define i8 @test_i8_7_mask_lshr_1(i8 %a0) {
-; X32-LABEL: test_i8_7_mask_lshr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $6, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_7_mask_lshr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $6, %al
+; X86-NEXT:    shrb %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_7_mask_lshr_1:
 ; X64:       # %bb.0:
@@ -31,12 +31,12 @@ define i8 @test_i8_7_mask_lshr_1(i8 %a0) {
 }
 
 define i8 @test_i8_28_mask_lshr_1(i8 %a0) {
-; X32-LABEL: test_i8_28_mask_lshr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $28, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_28_mask_lshr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $28, %al
+; X86-NEXT:    shrb %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_28_mask_lshr_1:
 ; X64:       # %bb.0:
@@ -50,12 +50,12 @@ define i8 @test_i8_28_mask_lshr_1(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_28_mask_lshr_2(i8 %a0) {
-; X32-LABEL: test_i8_28_mask_lshr_2:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $28, %al
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_28_mask_lshr_2:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $28, %al
+; X86-NEXT:    shrb $2, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_28_mask_lshr_2:
 ; X64:       # %bb.0:
@@ -69,12 +69,12 @@ define i8 @test_i8_28_mask_lshr_2(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_28_mask_lshr_3(i8 %a0) {
-; X32-LABEL: test_i8_28_mask_lshr_3:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $24, %al
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_28_mask_lshr_3:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $24, %al
+; X86-NEXT:    shrb $3, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_28_mask_lshr_3:
 ; X64:       # %bb.0:
@@ -88,12 +88,12 @@ define i8 @test_i8_28_mask_lshr_3(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_28_mask_lshr_4(i8 %a0) {
-; X32-LABEL: test_i8_28_mask_lshr_4:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $16, %al
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_28_mask_lshr_4:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $16, %al
+; X86-NEXT:    shrb $4, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_28_mask_lshr_4:
 ; X64:       # %bb.0:
@@ -108,12 +108,12 @@ define i8 @test_i8_28_mask_lshr_4(i8 %a0) {
 }
 
 define i8 @test_i8_224_mask_lshr_1(i8 %a0) {
-; X32-LABEL: test_i8_224_mask_lshr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $-32, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_224_mask_lshr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $-32, %al
+; X86-NEXT:    shrb %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_224_mask_lshr_1:
 ; X64:       # %bb.0:
@@ -127,12 +127,12 @@ define i8 @test_i8_224_mask_lshr_1(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_224_mask_lshr_4(i8 %a0) {
-; X32-LABEL: test_i8_224_mask_lshr_4:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $-32, %al
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_224_mask_lshr_4:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $-32, %al
+; X86-NEXT:    shrb $4, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_224_mask_lshr_4:
 ; X64:       # %bb.0:
@@ -146,11 +146,11 @@ define i8 @test_i8_224_mask_lshr_4(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_224_mask_lshr_5(i8 %a0) {
-; X32-LABEL: test_i8_224_mask_lshr_5:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_224_mask_lshr_5:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    shrb $5, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_224_mask_lshr_5:
 ; X64:       # %bb.0:
@@ -163,11 +163,11 @@ define i8 @test_i8_224_mask_lshr_5(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_224_mask_lshr_6(i8 %a0) {
-; X32-LABEL: test_i8_224_mask_lshr_6:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_224_mask_lshr_6:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    shrb $6, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_224_mask_lshr_6:
 ; X64:       # %bb.0:
@@ -183,12 +183,12 @@ define i8 @test_i8_224_mask_lshr_6(i8 %a0) {
 ; ashr
 
 define i8 @test_i8_7_mask_ashr_1(i8 %a0) {
-; X32-LABEL: test_i8_7_mask_ashr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $6, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_7_mask_ashr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $6, %al
+; X86-NEXT:    shrb %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_7_mask_ashr_1:
 ; X64:       # %bb.0:
@@ -203,12 +203,12 @@ define i8 @test_i8_7_mask_ashr_1(i8 %a0) {
 }
 
 define i8 @test_i8_28_mask_ashr_1(i8 %a0) {
-; X32-LABEL: test_i8_28_mask_ashr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $28, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_28_mask_ashr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $28, %al
+; X86-NEXT:    shrb %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_28_mask_ashr_1:
 ; X64:       # %bb.0:
@@ -222,12 +222,12 @@ define i8 @test_i8_28_mask_ashr_1(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_28_mask_ashr_2(i8 %a0) {
-; X32-LABEL: test_i8_28_mask_ashr_2:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $28, %al
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_28_mask_ashr_2:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $28, %al
+; X86-NEXT:    shrb $2, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_28_mask_ashr_2:
 ; X64:       # %bb.0:
@@ -241,12 +241,12 @@ define i8 @test_i8_28_mask_ashr_2(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_28_mask_ashr_3(i8 %a0) {
-; X32-LABEL: test_i8_28_mask_ashr_3:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $24, %al
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_28_mask_ashr_3:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $24, %al
+; X86-NEXT:    shrb $3, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_28_mask_ashr_3:
 ; X64:       # %bb.0:
@@ -260,12 +260,12 @@ define i8 @test_i8_28_mask_ashr_3(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_28_mask_ashr_4(i8 %a0) {
-; X32-LABEL: test_i8_28_mask_ashr_4:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $16, %al
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_28_mask_ashr_4:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $16, %al
+; X86-NEXT:    shrb $4, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_28_mask_ashr_4:
 ; X64:       # %bb.0:
@@ -280,12 +280,12 @@ define i8 @test_i8_28_mask_ashr_4(i8 %a0) {
 }
 
 define i8 @test_i8_224_mask_ashr_1(i8 %a0) {
-; X32-LABEL: test_i8_224_mask_ashr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $-32, %al
-; X32-NEXT:    sarb %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_224_mask_ashr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $-32, %al
+; X86-NEXT:    sarb %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_224_mask_ashr_1:
 ; X64:       # %bb.0:
@@ -299,12 +299,12 @@ define i8 @test_i8_224_mask_ashr_1(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_224_mask_ashr_4(i8 %a0) {
-; X32-LABEL: test_i8_224_mask_ashr_4:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $-32, %al
-; X32-NEXT:    sarb $4, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_224_mask_ashr_4:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $-32, %al
+; X86-NEXT:    sarb $4, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_224_mask_ashr_4:
 ; X64:       # %bb.0:
@@ -318,11 +318,11 @@ define i8 @test_i8_224_mask_ashr_4(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_224_mask_ashr_5(i8 %a0) {
-; X32-LABEL: test_i8_224_mask_ashr_5:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    sarb $5, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_224_mask_ashr_5:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    sarb $5, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_224_mask_ashr_5:
 ; X64:       # %bb.0:
@@ -335,11 +335,11 @@ define i8 @test_i8_224_mask_ashr_5(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_224_mask_ashr_6(i8 %a0) {
-; X32-LABEL: test_i8_224_mask_ashr_6:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    sarb $6, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_224_mask_ashr_6:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    sarb $6, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_224_mask_ashr_6:
 ; X64:       # %bb.0:
@@ -355,12 +355,12 @@ define i8 @test_i8_224_mask_ashr_6(i8 %a0) {
 ; shl
 
 define i8 @test_i8_7_mask_shl_1(i8 %a0) {
-; X32-LABEL: test_i8_7_mask_shl_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $7, %al
-; X32-NEXT:    addb %al, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_7_mask_shl_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $7, %al
+; X86-NEXT:    addb %al, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_7_mask_shl_1:
 ; X64:       # %bb.0:
@@ -374,12 +374,12 @@ define i8 @test_i8_7_mask_shl_1(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_7_mask_shl_4(i8 %a0) {
-; X32-LABEL: test_i8_7_mask_shl_4:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $7, %al
-; X32-NEXT:    shlb $4, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_7_mask_shl_4:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $7, %al
+; X86-NEXT:    shlb $4, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_7_mask_shl_4:
 ; X64:       # %bb.0:
@@ -393,11 +393,11 @@ define i8 @test_i8_7_mask_shl_4(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_7_mask_shl_5(i8 %a0) {
-; X32-LABEL: test_i8_7_mask_shl_5:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    shlb $5, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_7_mask_shl_5:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    shlb $5, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_7_mask_shl_5:
 ; X64:       # %bb.0:
@@ -410,11 +410,11 @@ define i8 @test_i8_7_mask_shl_5(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_7_mask_shl_6(i8 %a0) {
-; X32-LABEL: test_i8_7_mask_shl_6:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    shlb $6, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_7_mask_shl_6:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    shlb $6, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_7_mask_shl_6:
 ; X64:       # %bb.0:
@@ -428,12 +428,12 @@ define i8 @test_i8_7_mask_shl_6(i8 %a0) {
 }
 
 define i8 @test_i8_28_mask_shl_1(i8 %a0) {
-; X32-LABEL: test_i8_28_mask_shl_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $28, %al
-; X32-NEXT:    addb %al, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_28_mask_shl_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $28, %al
+; X86-NEXT:    addb %al, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_28_mask_shl_1:
 ; X64:       # %bb.0:
@@ -447,12 +447,12 @@ define i8 @test_i8_28_mask_shl_1(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_28_mask_shl_2(i8 %a0) {
-; X32-LABEL: test_i8_28_mask_shl_2:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $28, %al
-; X32-NEXT:    shlb $2, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_28_mask_shl_2:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $28, %al
+; X86-NEXT:    shlb $2, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_28_mask_shl_2:
 ; X64:       # %bb.0:
@@ -466,12 +466,12 @@ define i8 @test_i8_28_mask_shl_2(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_28_mask_shl_3(i8 %a0) {
-; X32-LABEL: test_i8_28_mask_shl_3:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $28, %al
-; X32-NEXT:    shlb $3, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_28_mask_shl_3:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $28, %al
+; X86-NEXT:    shlb $3, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_28_mask_shl_3:
 ; X64:       # %bb.0:
@@ -485,12 +485,12 @@ define i8 @test_i8_28_mask_shl_3(i8 %a0) {
   ret i8 %t1
 }
 define i8 @test_i8_28_mask_shl_4(i8 %a0) {
-; X32-LABEL: test_i8_28_mask_shl_4:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $12, %al
-; X32-NEXT:    shlb $4, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_28_mask_shl_4:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $12, %al
+; X86-NEXT:    shlb $4, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_28_mask_shl_4:
 ; X64:       # %bb.0:
@@ -505,12 +505,12 @@ define i8 @test_i8_28_mask_shl_4(i8 %a0) {
 }
 
 define i8 @test_i8_224_mask_shl_1(i8 %a0) {
-; X32-LABEL: test_i8_224_mask_shl_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $96, %al
-; X32-NEXT:    addb %al, %al
-; X32-NEXT:    retl
+; X86-LABEL: test_i8_224_mask_shl_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $96, %al
+; X86-NEXT:    addb %al, %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8_224_mask_shl_1:
 ; X64:       # %bb.0:
@@ -531,13 +531,13 @@ define i8 @test_i8_224_mask_shl_1(i8 %a0) {
 ; lshr
 
 define i16 @test_i16_127_mask_lshr_1(i16 %a0) {
-; X32-LABEL: test_i16_127_mask_lshr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $126, %eax
-; X32-NEXT:    shrl %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_127_mask_lshr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $126, %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_127_mask_lshr_1:
 ; X64:       # %bb.0:
@@ -552,13 +552,13 @@ define i16 @test_i16_127_mask_lshr_1(i16 %a0) {
 }
 
 define i16 @test_i16_2032_mask_lshr_3(i16 %a0) {
-; X32-LABEL: test_i16_2032_mask_lshr_3:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $2032, %eax # imm = 0x7F0
-; X32-NEXT:    shrl $3, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_2032_mask_lshr_3:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $2032, %eax # imm = 0x7F0
+; X86-NEXT:    shrl $3, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_2032_mask_lshr_3:
 ; X64:       # %bb.0:
@@ -572,13 +572,13 @@ define i16 @test_i16_2032_mask_lshr_3(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_2032_mask_lshr_4(i16 %a0) {
-; X32-LABEL: test_i16_2032_mask_lshr_4:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $4, %eax
-; X32-NEXT:    andl $127, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_2032_mask_lshr_4:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    andl $127, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_2032_mask_lshr_4:
 ; X64:       # %bb.0:
@@ -592,13 +592,13 @@ define i16 @test_i16_2032_mask_lshr_4(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_2032_mask_lshr_5(i16 %a0) {
-; X32-LABEL: test_i16_2032_mask_lshr_5:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $5, %eax
-; X32-NEXT:    andl $63, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_2032_mask_lshr_5:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $5, %eax
+; X86-NEXT:    andl $63, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_2032_mask_lshr_5:
 ; X64:       # %bb.0:
@@ -612,13 +612,13 @@ define i16 @test_i16_2032_mask_lshr_5(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_2032_mask_lshr_6(i16 %a0) {
-; X32-LABEL: test_i16_2032_mask_lshr_6:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $6, %eax
-; X32-NEXT:    andl $31, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_2032_mask_lshr_6:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $6, %eax
+; X86-NEXT:    andl $31, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_2032_mask_lshr_6:
 ; X64:       # %bb.0:
@@ -633,13 +633,13 @@ define i16 @test_i16_2032_mask_lshr_6(i16 %a0) {
 }
 
 define i16 @test_i16_65024_mask_lshr_1(i16 %a0) {
-; X32-LABEL: test_i16_65024_mask_lshr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $65024, %eax # imm = 0xFE00
-; X32-NEXT:    shrl %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_65024_mask_lshr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $65024, %eax # imm = 0xFE00
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_65024_mask_lshr_1:
 ; X64:       # %bb.0:
@@ -653,13 +653,13 @@ define i16 @test_i16_65024_mask_lshr_1(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_65024_mask_lshr_8(i16 %a0) {
-; X32-LABEL: test_i16_65024_mask_lshr_8:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $65024, %eax # imm = 0xFE00
-; X32-NEXT:    shrl $8, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_65024_mask_lshr_8:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $65024, %eax # imm = 0xFE00
+; X86-NEXT:    shrl $8, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_65024_mask_lshr_8:
 ; X64:       # %bb.0:
@@ -673,12 +673,12 @@ define i16 @test_i16_65024_mask_lshr_8(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_65024_mask_lshr_9(i16 %a0) {
-; X32-LABEL: test_i16_65024_mask_lshr_9:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $9, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_65024_mask_lshr_9:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $9, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_65024_mask_lshr_9:
 ; X64:       # %bb.0:
@@ -691,12 +691,12 @@ define i16 @test_i16_65024_mask_lshr_9(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_65024_mask_lshr_10(i16 %a0) {
-; X32-LABEL: test_i16_65024_mask_lshr_10:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $10, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_65024_mask_lshr_10:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $10, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_65024_mask_lshr_10:
 ; X64:       # %bb.0:
@@ -712,13 +712,13 @@ define i16 @test_i16_65024_mask_lshr_10(i16 %a0) {
 ; ashr
 
 define i16 @test_i16_127_mask_ashr_1(i16 %a0) {
-; X32-LABEL: test_i16_127_mask_ashr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $126, %eax
-; X32-NEXT:    shrl %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_127_mask_ashr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $126, %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_127_mask_ashr_1:
 ; X64:       # %bb.0:
@@ -733,13 +733,13 @@ define i16 @test_i16_127_mask_ashr_1(i16 %a0) {
 }
 
 define i16 @test_i16_2032_mask_ashr_3(i16 %a0) {
-; X32-LABEL: test_i16_2032_mask_ashr_3:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $2032, %eax # imm = 0x7F0
-; X32-NEXT:    shrl $3, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_2032_mask_ashr_3:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $2032, %eax # imm = 0x7F0
+; X86-NEXT:    shrl $3, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_2032_mask_ashr_3:
 ; X64:       # %bb.0:
@@ -753,13 +753,13 @@ define i16 @test_i16_2032_mask_ashr_3(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_2032_mask_ashr_4(i16 %a0) {
-; X32-LABEL: test_i16_2032_mask_ashr_4:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $4, %eax
-; X32-NEXT:    andl $127, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_2032_mask_ashr_4:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    andl $127, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_2032_mask_ashr_4:
 ; X64:       # %bb.0:
@@ -773,13 +773,13 @@ define i16 @test_i16_2032_mask_ashr_4(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_2032_mask_ashr_5(i16 %a0) {
-; X32-LABEL: test_i16_2032_mask_ashr_5:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $5, %eax
-; X32-NEXT:    andl $63, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_2032_mask_ashr_5:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $5, %eax
+; X86-NEXT:    andl $63, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_2032_mask_ashr_5:
 ; X64:       # %bb.0:
@@ -793,13 +793,13 @@ define i16 @test_i16_2032_mask_ashr_5(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_2032_mask_ashr_6(i16 %a0) {
-; X32-LABEL: test_i16_2032_mask_ashr_6:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $6, %eax
-; X32-NEXT:    andl $31, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_2032_mask_ashr_6:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $6, %eax
+; X86-NEXT:    andl $31, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_2032_mask_ashr_6:
 ; X64:       # %bb.0:
@@ -814,14 +814,14 @@ define i16 @test_i16_2032_mask_ashr_6(i16 %a0) {
 }
 
 define i16 @test_i16_65024_mask_ashr_1(i16 %a0) {
-; X32-LABEL: test_i16_65024_mask_ashr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $65024, %eax # imm = 0xFE00
-; X32-NEXT:    cwtl
-; X32-NEXT:    shrl %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_65024_mask_ashr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $65024, %eax # imm = 0xFE00
+; X86-NEXT:    cwtl
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_65024_mask_ashr_1:
 ; X64:       # %bb.0:
@@ -835,14 +835,14 @@ define i16 @test_i16_65024_mask_ashr_1(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_65024_mask_ashr_8(i16 %a0) {
-; X32-LABEL: test_i16_65024_mask_ashr_8:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $65024, %eax # imm = 0xFE00
-; X32-NEXT:    cwtl
-; X32-NEXT:    shrl $8, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_65024_mask_ashr_8:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $65024, %eax # imm = 0xFE00
+; X86-NEXT:    cwtl
+; X86-NEXT:    shrl $8, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_65024_mask_ashr_8:
 ; X64:       # %bb.0:
@@ -856,12 +856,12 @@ define i16 @test_i16_65024_mask_ashr_8(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_65024_mask_ashr_9(i16 %a0) {
-; X32-LABEL: test_i16_65024_mask_ashr_9:
-; X32:       # %bb.0:
-; X32-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $9, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_65024_mask_ashr_9:
+; X86:       # %bb.0:
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $9, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_65024_mask_ashr_9:
 ; X64:       # %bb.0:
@@ -874,12 +874,12 @@ define i16 @test_i16_65024_mask_ashr_9(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_65024_mask_ashr_10(i16 %a0) {
-; X32-LABEL: test_i16_65024_mask_ashr_10:
-; X32:       # %bb.0:
-; X32-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $10, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_65024_mask_ashr_10:
+; X86:       # %bb.0:
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $10, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_65024_mask_ashr_10:
 ; X64:       # %bb.0:
@@ -895,13 +895,13 @@ define i16 @test_i16_65024_mask_ashr_10(i16 %a0) {
 ; shl
 
 define i16 @test_i16_127_mask_shl_1(i16 %a0) {
-; X32-LABEL: test_i16_127_mask_shl_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $127, %eax
-; X32-NEXT:    addl %eax, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_127_mask_shl_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $127, %eax
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_127_mask_shl_1:
 ; X64:       # %bb.0:
@@ -915,13 +915,13 @@ define i16 @test_i16_127_mask_shl_1(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_127_mask_shl_8(i16 %a0) {
-; X32-LABEL: test_i16_127_mask_shl_8:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $127, %eax
-; X32-NEXT:    shll $8, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_127_mask_shl_8:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $127, %eax
+; X86-NEXT:    shll $8, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_127_mask_shl_8:
 ; X64:       # %bb.0:
@@ -935,12 +935,12 @@ define i16 @test_i16_127_mask_shl_8(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_127_mask_shl_9(i16 %a0) {
-; X32-LABEL: test_i16_127_mask_shl_9:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shll $9, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_127_mask_shl_9:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $9, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_127_mask_shl_9:
 ; X64:       # %bb.0:
@@ -953,12 +953,12 @@ define i16 @test_i16_127_mask_shl_9(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_127_mask_shl_10(i16 %a0) {
-; X32-LABEL: test_i16_127_mask_shl_10:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shll $10, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_127_mask_shl_10:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $10, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_127_mask_shl_10:
 ; X64:       # %bb.0:
@@ -972,13 +972,13 @@ define i16 @test_i16_127_mask_shl_10(i16 %a0) {
 }
 
 define i16 @test_i16_2032_mask_shl_3(i16 %a0) {
-; X32-LABEL: test_i16_2032_mask_shl_3:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $2032, %eax # imm = 0x7F0
-; X32-NEXT:    shll $3, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_2032_mask_shl_3:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $2032, %eax # imm = 0x7F0
+; X86-NEXT:    shll $3, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_2032_mask_shl_3:
 ; X64:       # %bb.0:
@@ -992,13 +992,13 @@ define i16 @test_i16_2032_mask_shl_3(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_2032_mask_shl_4(i16 %a0) {
-; X32-LABEL: test_i16_2032_mask_shl_4:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $2032, %eax # imm = 0x7F0
-; X32-NEXT:    shll $4, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_2032_mask_shl_4:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $2032, %eax # imm = 0x7F0
+; X86-NEXT:    shll $4, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_2032_mask_shl_4:
 ; X64:       # %bb.0:
@@ -1012,13 +1012,13 @@ define i16 @test_i16_2032_mask_shl_4(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_2032_mask_shl_5(i16 %a0) {
-; X32-LABEL: test_i16_2032_mask_shl_5:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $2032, %eax # imm = 0x7F0
-; X32-NEXT:    shll $5, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_2032_mask_shl_5:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $2032, %eax # imm = 0x7F0
+; X86-NEXT:    shll $5, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_2032_mask_shl_5:
 ; X64:       # %bb.0:
@@ -1032,13 +1032,13 @@ define i16 @test_i16_2032_mask_shl_5(i16 %a0) {
   ret i16 %t1
 }
 define i16 @test_i16_2032_mask_shl_6(i16 %a0) {
-; X32-LABEL: test_i16_2032_mask_shl_6:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $1008, %eax # imm = 0x3F0
-; X32-NEXT:    shll $6, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_2032_mask_shl_6:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1008, %eax # imm = 0x3F0
+; X86-NEXT:    shll $6, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_2032_mask_shl_6:
 ; X64:       # %bb.0:
@@ -1053,13 +1053,13 @@ define i16 @test_i16_2032_mask_shl_6(i16 %a0) {
 }
 
 define i16 @test_i16_65024_mask_shl_1(i16 %a0) {
-; X32-LABEL: test_i16_65024_mask_shl_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $32256, %eax # imm = 0x7E00
-; X32-NEXT:    addl %eax, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i16_65024_mask_shl_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $32256, %eax # imm = 0x7E00
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_65024_mask_shl_1:
 ; X64:       # %bb.0:
@@ -1080,12 +1080,12 @@ define i16 @test_i16_65024_mask_shl_1(i16 %a0) {
 ; lshr
 
 define i32 @test_i32_32767_mask_lshr_1(i32 %a0) {
-; X32-LABEL: test_i32_32767_mask_lshr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $32766, %eax # imm = 0x7FFE
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_32767_mask_lshr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $32766, %eax # imm = 0x7FFE
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_32767_mask_lshr_1:
 ; X64:       # %bb.0:
@@ -1099,12 +1099,12 @@ define i32 @test_i32_32767_mask_lshr_1(i32 %a0) {
 }
 
 define i32 @test_i32_8388352_mask_lshr_7(i32 %a0) {
-; X32-LABEL: test_i32_8388352_mask_lshr_7:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $7, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_8388352_mask_lshr_7:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $7, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_8388352_mask_lshr_7:
 ; X64:       # %bb.0:
@@ -1117,12 +1117,12 @@ define i32 @test_i32_8388352_mask_lshr_7(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_8388352_mask_lshr_8(i32 %a0) {
-; X32-LABEL: test_i32_8388352_mask_lshr_8:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $8, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_8388352_mask_lshr_8:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $8, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_8388352_mask_lshr_8:
 ; X64:       # %bb.0:
@@ -1135,12 +1135,12 @@ define i32 @test_i32_8388352_mask_lshr_8(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_8388352_mask_lshr_9(i32 %a0) {
-; X32-LABEL: test_i32_8388352_mask_lshr_9:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $8388096, %eax # imm = 0x7FFE00
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $9, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_8388352_mask_lshr_9:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $8388096, %eax # imm = 0x7FFE00
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $9, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_8388352_mask_lshr_9:
 ; X64:       # %bb.0:
@@ -1153,12 +1153,12 @@ define i32 @test_i32_8388352_mask_lshr_9(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_8388352_mask_lshr_10(i32 %a0) {
-; X32-LABEL: test_i32_8388352_mask_lshr_10:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $8387584, %eax # imm = 0x7FFC00
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $10, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_8388352_mask_lshr_10:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $8387584, %eax # imm = 0x7FFC00
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $10, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_8388352_mask_lshr_10:
 ; X64:       # %bb.0:
@@ -1172,12 +1172,12 @@ define i32 @test_i32_8388352_mask_lshr_10(i32 %a0) {
 }
 
 define i32 @test_i32_4294836224_mask_lshr_1(i32 %a0) {
-; X32-LABEL: test_i32_4294836224_mask_lshr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $-131072, %eax # imm = 0xFFFE0000
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_4294836224_mask_lshr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $-131072, %eax # imm = 0xFFFE0000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_4294836224_mask_lshr_1:
 ; X64:       # %bb.0:
@@ -1190,12 +1190,12 @@ define i32 @test_i32_4294836224_mask_lshr_1(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_4294836224_mask_lshr_16(i32 %a0) {
-; X32-LABEL: test_i32_4294836224_mask_lshr_16:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $-131072, %eax # imm = 0xFFFE0000
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $16, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_4294836224_mask_lshr_16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $-131072, %eax # imm = 0xFFFE0000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $16, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_4294836224_mask_lshr_16:
 ; X64:       # %bb.0:
@@ -1208,11 +1208,11 @@ define i32 @test_i32_4294836224_mask_lshr_16(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_4294836224_mask_lshr_17(i32 %a0) {
-; X32-LABEL: test_i32_4294836224_mask_lshr_17:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $17, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_4294836224_mask_lshr_17:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $17, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_4294836224_mask_lshr_17:
 ; X64:       # %bb.0:
@@ -1224,11 +1224,11 @@ define i32 @test_i32_4294836224_mask_lshr_17(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_4294836224_mask_lshr_18(i32 %a0) {
-; X32-LABEL: test_i32_4294836224_mask_lshr_18:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $18, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_4294836224_mask_lshr_18:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $18, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_4294836224_mask_lshr_18:
 ; X64:       # %bb.0:
@@ -1243,12 +1243,12 @@ define i32 @test_i32_4294836224_mask_lshr_18(i32 %a0) {
 ; ashr
 
 define i32 @test_i32_32767_mask_ashr_1(i32 %a0) {
-; X32-LABEL: test_i32_32767_mask_ashr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $32766, %eax # imm = 0x7FFE
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_32767_mask_ashr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $32766, %eax # imm = 0x7FFE
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_32767_mask_ashr_1:
 ; X64:       # %bb.0:
@@ -1262,12 +1262,12 @@ define i32 @test_i32_32767_mask_ashr_1(i32 %a0) {
 }
 
 define i32 @test_i32_8388352_mask_ashr_7(i32 %a0) {
-; X32-LABEL: test_i32_8388352_mask_ashr_7:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $7, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_8388352_mask_ashr_7:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $7, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_8388352_mask_ashr_7:
 ; X64:       # %bb.0:
@@ -1280,12 +1280,12 @@ define i32 @test_i32_8388352_mask_ashr_7(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_8388352_mask_ashr_8(i32 %a0) {
-; X32-LABEL: test_i32_8388352_mask_ashr_8:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $8, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_8388352_mask_ashr_8:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $8, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_8388352_mask_ashr_8:
 ; X64:       # %bb.0:
@@ -1298,12 +1298,12 @@ define i32 @test_i32_8388352_mask_ashr_8(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_8388352_mask_ashr_9(i32 %a0) {
-; X32-LABEL: test_i32_8388352_mask_ashr_9:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $8388096, %eax # imm = 0x7FFE00
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $9, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_8388352_mask_ashr_9:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $8388096, %eax # imm = 0x7FFE00
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $9, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_8388352_mask_ashr_9:
 ; X64:       # %bb.0:
@@ -1316,12 +1316,12 @@ define i32 @test_i32_8388352_mask_ashr_9(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_8388352_mask_ashr_10(i32 %a0) {
-; X32-LABEL: test_i32_8388352_mask_ashr_10:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $8387584, %eax # imm = 0x7FFC00
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $10, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_8388352_mask_ashr_10:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $8387584, %eax # imm = 0x7FFC00
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $10, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_8388352_mask_ashr_10:
 ; X64:       # %bb.0:
@@ -1335,12 +1335,12 @@ define i32 @test_i32_8388352_mask_ashr_10(i32 %a0) {
 }
 
 define i32 @test_i32_4294836224_mask_ashr_1(i32 %a0) {
-; X32-LABEL: test_i32_4294836224_mask_ashr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $-131072, %eax # imm = 0xFFFE0000
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    sarl %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_4294836224_mask_ashr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $-131072, %eax # imm = 0xFFFE0000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sarl %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_4294836224_mask_ashr_1:
 ; X64:       # %bb.0:
@@ -1353,12 +1353,12 @@ define i32 @test_i32_4294836224_mask_ashr_1(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_4294836224_mask_ashr_16(i32 %a0) {
-; X32-LABEL: test_i32_4294836224_mask_ashr_16:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $-131072, %eax # imm = 0xFFFE0000
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    sarl $16, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_4294836224_mask_ashr_16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $-131072, %eax # imm = 0xFFFE0000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sarl $16, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_4294836224_mask_ashr_16:
 ; X64:       # %bb.0:
@@ -1371,11 +1371,11 @@ define i32 @test_i32_4294836224_mask_ashr_16(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_4294836224_mask_ashr_17(i32 %a0) {
-; X32-LABEL: test_i32_4294836224_mask_ashr_17:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    sarl $17, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_4294836224_mask_ashr_17:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sarl $17, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_4294836224_mask_ashr_17:
 ; X64:       # %bb.0:
@@ -1387,11 +1387,11 @@ define i32 @test_i32_4294836224_mask_ashr_17(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_4294836224_mask_ashr_18(i32 %a0) {
-; X32-LABEL: test_i32_4294836224_mask_ashr_18:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    sarl $18, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_4294836224_mask_ashr_18:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sarl $18, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_4294836224_mask_ashr_18:
 ; X64:       # %bb.0:
@@ -1406,12 +1406,12 @@ define i32 @test_i32_4294836224_mask_ashr_18(i32 %a0) {
 ; shl
 
 define i32 @test_i32_32767_mask_shl_1(i32 %a0) {
-; X32-LABEL: test_i32_32767_mask_shl_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $32767, %eax # imm = 0x7FFF
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl %eax, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_32767_mask_shl_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_32767_mask_shl_1:
 ; X64:       # %bb.0:
@@ -1424,12 +1424,12 @@ define i32 @test_i32_32767_mask_shl_1(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_32767_mask_shl_16(i32 %a0) {
-; X32-LABEL: test_i32_32767_mask_shl_16:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $32767, %eax # imm = 0x7FFF
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shll $16, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_32767_mask_shl_16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $16, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_32767_mask_shl_16:
 ; X64:       # %bb.0:
@@ -1442,11 +1442,11 @@ define i32 @test_i32_32767_mask_shl_16(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_32767_mask_shl_17(i32 %a0) {
-; X32-LABEL: test_i32_32767_mask_shl_17:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shll $17, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_32767_mask_shl_17:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $17, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_32767_mask_shl_17:
 ; X64:       # %bb.0:
@@ -1458,11 +1458,11 @@ define i32 @test_i32_32767_mask_shl_17(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_32767_mask_shl_18(i32 %a0) {
-; X32-LABEL: test_i32_32767_mask_shl_18:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shll $18, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_32767_mask_shl_18:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $18, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_32767_mask_shl_18:
 ; X64:       # %bb.0:
@@ -1475,12 +1475,12 @@ define i32 @test_i32_32767_mask_shl_18(i32 %a0) {
 }
 
 define i32 @test_i32_8388352_mask_shl_7(i32 %a0) {
-; X32-LABEL: test_i32_8388352_mask_shl_7:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shll $7, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_8388352_mask_shl_7:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $7, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_8388352_mask_shl_7:
 ; X64:       # %bb.0:
@@ -1493,12 +1493,12 @@ define i32 @test_i32_8388352_mask_shl_7(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_8388352_mask_shl_8(i32 %a0) {
-; X32-LABEL: test_i32_8388352_mask_shl_8:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shll $8, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_8388352_mask_shl_8:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $8, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_8388352_mask_shl_8:
 ; X64:       # %bb.0:
@@ -1511,12 +1511,12 @@ define i32 @test_i32_8388352_mask_shl_8(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_8388352_mask_shl_9(i32 %a0) {
-; X32-LABEL: test_i32_8388352_mask_shl_9:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shll $9, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_8388352_mask_shl_9:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $8388352, %eax # imm = 0x7FFF00
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $9, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_8388352_mask_shl_9:
 ; X64:       # %bb.0:
@@ -1529,12 +1529,12 @@ define i32 @test_i32_8388352_mask_shl_9(i32 %a0) {
   ret i32 %t1
 }
 define i32 @test_i32_8388352_mask_shl_10(i32 %a0) {
-; X32-LABEL: test_i32_8388352_mask_shl_10:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $4194048, %eax # imm = 0x3FFF00
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shll $10, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_8388352_mask_shl_10:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $4194048, %eax # imm = 0x3FFF00
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $10, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_8388352_mask_shl_10:
 ; X64:       # %bb.0:
@@ -1548,12 +1548,12 @@ define i32 @test_i32_8388352_mask_shl_10(i32 %a0) {
 }
 
 define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) {
-; X32-LABEL: test_i32_4294836224_mask_shl_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $2147352576, %eax # imm = 0x7FFE0000
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl %eax, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i32_4294836224_mask_shl_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $2147352576, %eax # imm = 0x7FFE0000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_4294836224_mask_shl_1:
 ; X64:       # %bb.0:
@@ -1573,13 +1573,13 @@ define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) {
 ; lshr
 
 define i64 @test_i64_2147483647_mask_lshr_1(i64 %a0) {
-; X32-LABEL: test_i64_2147483647_mask_lshr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $2147483646, %eax # imm = 0x7FFFFFFE
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_2147483647_mask_lshr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $2147483646, %eax # imm = 0x7FFFFFFE
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_2147483647_mask_lshr_1:
 ; X64:       # %bb.0:
@@ -1593,14 +1593,14 @@ define i64 @test_i64_2147483647_mask_lshr_1(i64 %a0) {
 }
 
 define i64 @test_i64_140737488289792_mask_lshr_15(i64 %a0) {
-; X32-LABEL: test_i64_140737488289792_mask_lshr_15:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    shll $16, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shldl $17, %ecx, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_140737488289792_mask_lshr_15:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll $16, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shldl $17, %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_lshr_15:
 ; X64:       # %bb.0:
@@ -1613,14 +1613,14 @@ define i64 @test_i64_140737488289792_mask_lshr_15(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_140737488289792_mask_lshr_16(i64 %a0) {
-; X32-LABEL: test_i64_140737488289792_mask_lshr_16:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl $32767, %eax # imm = 0x7FFF
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shldl $16, %ecx, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_140737488289792_mask_lshr_16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shldl $16, %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_lshr_16:
 ; X64:       # %bb.0:
@@ -1633,14 +1633,14 @@ define i64 @test_i64_140737488289792_mask_lshr_16(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_140737488289792_mask_lshr_17(i64 %a0) {
-; X32-LABEL: test_i64_140737488289792_mask_lshr_17:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl $32767, %eax # imm = 0x7FFF
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shldl $15, %ecx, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_140737488289792_mask_lshr_17:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shldl $15, %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_lshr_17:
 ; X64:       # %bb.0:
@@ -1653,14 +1653,14 @@ define i64 @test_i64_140737488289792_mask_lshr_17(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_140737488289792_mask_lshr_18(i64 %a0) {
-; X32-LABEL: test_i64_140737488289792_mask_lshr_18:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl $32767, %eax # imm = 0x7FFF
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shldl $14, %ecx, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_140737488289792_mask_lshr_18:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shldl $14, %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_lshr_18:
 ; X64:       # %bb.0:
@@ -1674,12 +1674,12 @@ define i64 @test_i64_140737488289792_mask_lshr_18(i64 %a0) {
 }
 
 define i64 @test_i64_18446744065119617024_mask_lshr_1(i64 %a0) {
-; X32-LABEL: test_i64_18446744065119617024_mask_lshr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    shrl %edx
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_18446744065119617024_mask_lshr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shrl %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_18446744065119617024_mask_lshr_1:
 ; X64:       # %bb.0:
@@ -1692,12 +1692,12 @@ define i64 @test_i64_18446744065119617024_mask_lshr_1(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_18446744065119617024_mask_lshr_32(i64 %a0) {
-; X32-LABEL: test_i64_18446744065119617024_mask_lshr_32:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    andl $-2, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_18446744065119617024_mask_lshr_32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $-2, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_18446744065119617024_mask_lshr_32:
 ; X64:       # %bb.0:
@@ -1710,12 +1710,12 @@ define i64 @test_i64_18446744065119617024_mask_lshr_32(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_18446744065119617024_mask_lshr_33(i64 %a0) {
-; X32-LABEL: test_i64_18446744065119617024_mask_lshr_33:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_18446744065119617024_mask_lshr_33:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_18446744065119617024_mask_lshr_33:
 ; X64:       # %bb.0:
@@ -1727,12 +1727,12 @@ define i64 @test_i64_18446744065119617024_mask_lshr_33(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_18446744065119617024_mask_lshr_34(i64 %a0) {
-; X32-LABEL: test_i64_18446744065119617024_mask_lshr_34:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl $2, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_18446744065119617024_mask_lshr_34:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_18446744065119617024_mask_lshr_34:
 ; X64:       # %bb.0:
@@ -1747,13 +1747,13 @@ define i64 @test_i64_18446744065119617024_mask_lshr_34(i64 %a0) {
 ; ashr
 
 define i64 @test_i64_2147483647_mask_ashr_1(i64 %a0) {
-; X32-LABEL: test_i64_2147483647_mask_ashr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $2147483646, %eax # imm = 0x7FFFFFFE
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shrl %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_2147483647_mask_ashr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $2147483646, %eax # imm = 0x7FFFFFFE
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_2147483647_mask_ashr_1:
 ; X64:       # %bb.0:
@@ -1767,14 +1767,14 @@ define i64 @test_i64_2147483647_mask_ashr_1(i64 %a0) {
 }
 
 define i64 @test_i64_140737488289792_mask_ashr_15(i64 %a0) {
-; X32-LABEL: test_i64_140737488289792_mask_ashr_15:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    shll $16, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shldl $17, %ecx, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_140737488289792_mask_ashr_15:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll $16, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shldl $17, %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_ashr_15:
 ; X64:       # %bb.0:
@@ -1787,14 +1787,14 @@ define i64 @test_i64_140737488289792_mask_ashr_15(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_140737488289792_mask_ashr_16(i64 %a0) {
-; X32-LABEL: test_i64_140737488289792_mask_ashr_16:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl $32767, %eax # imm = 0x7FFF
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shldl $16, %ecx, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_140737488289792_mask_ashr_16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shldl $16, %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_ashr_16:
 ; X64:       # %bb.0:
@@ -1807,14 +1807,14 @@ define i64 @test_i64_140737488289792_mask_ashr_16(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_140737488289792_mask_ashr_17(i64 %a0) {
-; X32-LABEL: test_i64_140737488289792_mask_ashr_17:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl $32767, %eax # imm = 0x7FFF
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shldl $15, %ecx, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_140737488289792_mask_ashr_17:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shldl $15, %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_ashr_17:
 ; X64:       # %bb.0:
@@ -1827,14 +1827,14 @@ define i64 @test_i64_140737488289792_mask_ashr_17(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_140737488289792_mask_ashr_18(i64 %a0) {
-; X32-LABEL: test_i64_140737488289792_mask_ashr_18:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl $32767, %eax # imm = 0x7FFF
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shldl $14, %ecx, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_140737488289792_mask_ashr_18:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shldl $14, %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_ashr_18:
 ; X64:       # %bb.0:
@@ -1848,12 +1848,12 @@ define i64 @test_i64_140737488289792_mask_ashr_18(i64 %a0) {
 }
 
 define i64 @test_i64_18446744065119617024_mask_ashr_1(i64 %a0) {
-; X32-LABEL: test_i64_18446744065119617024_mask_ashr_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    sarl %edx
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_18446744065119617024_mask_ashr_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sarl %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_18446744065119617024_mask_ashr_1:
 ; X64:       # %bb.0:
@@ -1866,13 +1866,13 @@ define i64 @test_i64_18446744065119617024_mask_ashr_1(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_18446744065119617024_mask_ashr_32(i64 %a0) {
-; X32-LABEL: test_i64_18446744065119617024_mask_ashr_32:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    andl $-2, %eax
-; X32-NEXT:    sarl $31, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_18446744065119617024_mask_ashr_32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    andl $-2, %eax
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_18446744065119617024_mask_ashr_32:
 ; X64:       # %bb.0:
@@ -1885,13 +1885,13 @@ define i64 @test_i64_18446744065119617024_mask_ashr_32(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_18446744065119617024_mask_ashr_33(i64 %a0) {
-; X32-LABEL: test_i64_18446744065119617024_mask_ashr_33:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    sarl %eax
-; X32-NEXT:    sarl $31, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_18446744065119617024_mask_ashr_33:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    sarl %eax
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_18446744065119617024_mask_ashr_33:
 ; X64:       # %bb.0:
@@ -1903,13 +1903,13 @@ define i64 @test_i64_18446744065119617024_mask_ashr_33(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_18446744065119617024_mask_ashr_34(i64 %a0) {
-; X32-LABEL: test_i64_18446744065119617024_mask_ashr_34:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    sarl $2, %eax
-; X32-NEXT:    sarl $31, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_18446744065119617024_mask_ashr_34:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    sarl $2, %eax
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_18446744065119617024_mask_ashr_34:
 ; X64:       # %bb.0:
@@ -1924,12 +1924,12 @@ define i64 @test_i64_18446744065119617024_mask_ashr_34(i64 %a0) {
 ; shl
 
 define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) {
-; X32-LABEL: test_i64_2147483647_mask_shl_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_2147483647_mask_shl_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_2147483647_mask_shl_1:
 ; X64:       # %bb.0:
@@ -1941,12 +1941,12 @@ define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_2147483647_mask_shl_32(i64 %a0) {
-; X32-LABEL: test_i64_2147483647_mask_shl_32:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_2147483647_mask_shl_32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_2147483647_mask_shl_32:
 ; X64:       # %bb.0:
@@ -1959,12 +1959,12 @@ define i64 @test_i64_2147483647_mask_shl_32(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_2147483647_mask_shl_33(i64 %a0) {
-; X32-LABEL: test_i64_2147483647_mask_shl_33:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    addl %edx, %edx
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_2147483647_mask_shl_33:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    addl %edx, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_2147483647_mask_shl_33:
 ; X64:       # %bb.0:
@@ -1976,12 +1976,12 @@ define i64 @test_i64_2147483647_mask_shl_33(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) {
-; X32-LABEL: test_i64_2147483647_mask_shl_34:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    shll $2, %edx
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_2147483647_mask_shl_34:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shll $2, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_2147483647_mask_shl_34:
 ; X64:       # %bb.0:
@@ -1994,16 +1994,16 @@ define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) {
 }
 
 define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
-; X32-LABEL: test_i64_140737488289792_mask_shl_15:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shll $16, %ecx
-; X32-NEXT:    movl $32767, %edx # imm = 0x7FFF
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    shldl $15, %ecx, %edx
-; X32-NEXT:    shll $31, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_140737488289792_mask_shl_15:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll $16, %ecx
+; X86-NEXT:    movl $32767, %edx # imm = 0x7FFF
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shldl $15, %ecx, %edx
+; X86-NEXT:    shll $31, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_shl_15:
 ; X64:       # %bb.0:
@@ -2016,15 +2016,15 @@ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) {
-; X32-LABEL: test_i64_140737488289792_mask_shl_16:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shll $16, %eax
-; X32-NEXT:    movl $32767, %edx # imm = 0x7FFF
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    shldl $16, %eax, %edx
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_140737488289792_mask_shl_16:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $16, %eax
+; X86-NEXT:    movl $32767, %edx # imm = 0x7FFF
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shldl $16, %eax, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_shl_16:
 ; X64:       # %bb.0:
@@ -2037,14 +2037,14 @@ define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) {
-; X32-LABEL: test_i64_140737488289792_mask_shl_17:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shll $16, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    shldl $17, %eax, %edx
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_140737488289792_mask_shl_17:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $16, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shldl $17, %eax, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_shl_17:
 ; X64:       # %bb.0:
@@ -2057,14 +2057,14 @@ define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) {
   ret i64 %t1
 }
 define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) {
-; X32-LABEL: test_i64_140737488289792_mask_shl_18:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    shll $16, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    shldl $18, %eax, %edx
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_140737488289792_mask_shl_18:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $16, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shldl $18, %eax, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_shl_18:
 ; X64:       # %bb.0:
@@ -2078,13 +2078,13 @@ define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) {
 }
 
 define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) {
-; X32-LABEL: test_i64_18446744065119617024_mask_shl_1:
-; X32:       # %bb.0:
-; X32-NEXT:    movl $2147483646, %edx # imm = 0x7FFFFFFE
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    addl %edx, %edx
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    retl
+; X86-LABEL: test_i64_18446744065119617024_mask_shl_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $2147483646, %edx # imm = 0x7FFFFFFE
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    addl %edx, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_18446744065119617024_mask_shl_1:
 ; X64:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/float-conv-elim.ll b/llvm/test/CodeGen/X86/float-conv-elim.ll
index 4a651cf..2a543df 100644
--- a/llvm/test/CodeGen/X86/float-conv-elim.ll
+++ b/llvm/test/CodeGen/X86/float-conv-elim.ll
@@ -1,32 +1,137 @@
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-- -mcpu=x86-64 < %s | FileCheck %s
 
-; Make sure the float conversion is folded away as it should be.
-; CHECK-LABEL: foo
-; CHECK-NOT: cvt
-; CHECK: movzbl
-define i32 @foo(i8 %a) #0 {
+define i32 @u8_f32_s32(i8 %a) {
+; CHECK-LABEL: u8_f32_s32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    retq
   %conv = uitofp i8 %a to float
   %conv1 = fptosi float %conv to i32
   ret i32 %conv1
 }
 
-; CHECK-LABEL: foo2
-; CHECK-NOT: cvt
-; CHECK: movsbl
-define i32 @foo2(i8 %a) #0 {
+define i32 @s8_f32_s32(i8 %a) {
+; CHECK-LABEL: s8_f32_s32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movsbl %dil, %eax
+; CHECK-NEXT:    retq
   %conv = sitofp i8 %a to float
   %conv1 = fptosi float %conv to i32
   ret i32 %conv1
 }
 
-; CHECK-LABEL: bar
-; CHECK-NOT: cvt
-; CHECK: movl
-define zeroext i8 @bar(i8 zeroext %a) #0 {
+define zeroext i8 @u8_f32_u8(i8 zeroext %a) {
+; CHECK-LABEL: u8_f32_u8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
   %conv = uitofp i8 %a to float
   %conv1 = fptoui float %conv to i8
   ret i8 %conv1
 }
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+define i32 @s32_f32_s24_s32(i32 %a) {
+; CHECK-LABEL: s32_f32_s24_s32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shll $8, %eax
+; CHECK-NEXT:    sarl $8, %eax
+; CHECK-NEXT:    retq
+  %f = sitofp i32 %a to float
+  %i = fptosi float %f to i24
+  %r = sext i24 %i to i32
+  ret i32 %r
+}
+
+define i32 @s32_f32_u24_u32(i32 %a) {
+; CHECK-LABEL: s32_f32_u24_u32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $16777215, %eax # imm = 0xFFFFFF
+; CHECK-NEXT:    retq
+  %f = sitofp i32 %a to float
+  %i = fptoui float %f to i24
+  %r = zext i24 %i to i32
+  ret i32 %r
+}
+
+define i32 @u32_f32_s24_s32(i32 %a) {
+; CHECK-LABEL: u32_f32_s24_s32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shll $8, %eax
+; CHECK-NEXT:    sarl $8, %eax
+; CHECK-NEXT:    retq
+  %f = uitofp i32 %a to float
+  %i = fptosi float %f to i24
+  %r = sext i24 %i to i32
+  ret i32 %r
+}
+
+define i32 @u32_f32_u24_u32(i32 %a) {
+; CHECK-LABEL: u32_f32_u24_u32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $16777215, %eax # imm = 0xFFFFFF
+; CHECK-NEXT:    retq
+  %f = uitofp i32 %a to float
+  %i = fptoui float %f to i24
+  %r = zext i24 %i to i32
+  ret i32 %r
+}
+
+; This requires converting to FP and back.
 
+define i32 @s32_f32_s25_s32(i32 %a) {
+; CHECK-LABEL: s32_f32_s25_s32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cvtsi2ss %edi, %xmm0
+; CHECK-NEXT:    cvttss2si %xmm0, %eax
+; CHECK-NEXT:    retq
+  %f = sitofp i32 %a to float
+  %i = fptosi float %f to i25
+  %r = sext i25 %i to i32
+  ret i32 %r
+}
+
+define i32 @s32_f32_u25_u32(i32 %a) {
+; CHECK-LABEL: s32_f32_u25_u32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cvtsi2ss %edi, %xmm0
+; CHECK-NEXT:    cvttss2si %xmm0, %eax
+; CHECK-NEXT:    retq
+  %f = sitofp i32 %a to float
+  %i = fptoui float %f to i25
+  %r = zext i25 %i to i32
+  ret i32 %r
+}
+
+; TODO: This could avoid converting to FP.
+
+define i32 @u32_f32_s25_s32(i32 %a) {
+; CHECK-LABEL: u32_f32_s25_s32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cvtsi2ss %rax, %xmm0
+; CHECK-NEXT:    cvttss2si %xmm0, %eax
+; CHECK-NEXT:    retq
+  %f = uitofp i32 %a to float
+  %i = fptosi float %f to i25
+  %r = sext i25 %i to i32
+  ret i32 %r
+}
+
+define i32 @u32_f32_u25_u32(i32 %a) {
+; CHECK-LABEL: u32_f32_u25_u32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cvtsi2ss %rax, %xmm0
+; CHECK-NEXT:    cvttss2si %xmm0, %eax
+; CHECK-NEXT:    retq
+  %f = uitofp i32 %a to float
+  %i = fptoui float %f to i25
+  %r = zext i25 %i to i32
+  ret i32 %r
+}
diff --git a/llvm/test/CodeGen/X86/load-local-v3i129.ll b/llvm/test/CodeGen/X86/load-local-v3i129.ll
index b88e7a0..f339891 100644
--- a/llvm/test/CodeGen/X86/load-local-v3i129.ll
+++ b/llvm/test/CodeGen/X86/load-local-v3i129.ll
@@ -1,20 +1,39 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=FAST-SHLD
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefix=SLOW-SHLD
 
 define void @_start() {
-; CHECK-LABEL: _start:
-; CHECK:       # %bb.0: # %Entry
-; CHECK-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT:    shrdq $2, %rcx, %rax
-; CHECK-NEXT:    shrq $2, %rcx
-; CHECK-NEXT:    leaq 1(,%rax,4), %rdx
-; CHECK-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    shrdq $62, %rcx, %rax
-; CHECK-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    orq $-2, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq $-1, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    retq
+; FAST-SHLD-LABEL: _start:
+; FAST-SHLD:       # %bb.0: # %Entry
+; FAST-SHLD-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
+; FAST-SHLD-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
+; FAST-SHLD-NEXT:    shrdq $2, %rcx, %rax
+; FAST-SHLD-NEXT:    shrq $2, %rcx
+; FAST-SHLD-NEXT:    leaq 1(,%rax,4), %rdx
+; FAST-SHLD-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
+; FAST-SHLD-NEXT:    shrdq $62, %rcx, %rax
+; FAST-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; FAST-SHLD-NEXT:    orq $-2, -{{[0-9]+}}(%rsp)
+; FAST-SHLD-NEXT:    movq $-1, -{{[0-9]+}}(%rsp)
+; FAST-SHLD-NEXT:    retq
+;
+; SLOW-SHLD-LABEL: _start:
+; SLOW-SHLD:       # %bb.0: # %Entry
+; SLOW-SHLD-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
+; SLOW-SHLD-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
+; SLOW-SHLD-NEXT:    shrq $2, %rax
+; SLOW-SHLD-NEXT:    movq %rcx, %rdx
+; SLOW-SHLD-NEXT:    shlq $62, %rdx
+; SLOW-SHLD-NEXT:    orq %rax, %rdx
+; SLOW-SHLD-NEXT:    andq $-4, %rcx
+; SLOW-SHLD-NEXT:    leaq 1(,%rdx,4), %rax
+; SLOW-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; SLOW-SHLD-NEXT:    shrq $62, %rdx
+; SLOW-SHLD-NEXT:    orq %rcx, %rdx
+; SLOW-SHLD-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
+; SLOW-SHLD-NEXT:    orq $-2, -{{[0-9]+}}(%rsp)
+; SLOW-SHLD-NEXT:    movq $-1, -{{[0-9]+}}(%rsp)
+; SLOW-SHLD-NEXT:    retq
 Entry:
   %y = alloca <3 x i129>, align 4
   %L = load <3 x i129>, <3 x i129>* %y
diff --git a/llvm/test/CodeGen/X86/lsr-interesting-step.ll b/llvm/test/CodeGen/X86/lsr-interesting-step.ll
index c9096156..66afd38 100644
--- a/llvm/test/CodeGen/X86/lsr-interesting-step.ll
+++ b/llvm/test/CodeGen/X86/lsr-interesting-step.ll
@@ -1,17 +1,35 @@
-; RUN: llc < %s -relocation-model=static -mtriple=x86_64-unknown-linux-gnu -asm-verbose=0 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -relocation-model=static -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 
 ; The inner loop should require only one add (and no leas either).
 ; rdar://8100380
 
-; CHECK:      BB0_2:
-; CHECK-NEXT:   movb    $0, flags(%rcx)
-; CHECK-NEXT:   addq    %rax, %rcx
-; CHECK-NEXT:   cmpq    $8192, %rcx
-; CHECK-NEXT:   jl
-
 @flags = external dso_local global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1]
 
 define void @foo() nounwind {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl $2, %eax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_1: # %bb7
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB0_2 Depth 2
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_2: # %bb11
+; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    movb $0, flags(%rcx)
+; CHECK-NEXT:    addq %rax, %rcx
+; CHECK-NEXT:    cmpq $8192, %rcx # imm = 0x2000
+; CHECK-NEXT:    jl .LBB0_2
+; CHECK-NEXT:  # %bb.3: # %bb16
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    incq %rax
+; CHECK-NEXT:    cmpq $8192, %rax # imm = 0x2000
+; CHECK-NEXT:    jl .LBB0_1
+; CHECK-NEXT:  # %bb.4: # %bb20
+; CHECK-NEXT:    retq
 entry:
   br label %bb
 
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 779f9dc..5a7f572 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -4539,6 +4539,8 @@ define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) {
   ret i1 %val
 }
 
+; TODO: We expect similar result as for PR39665_c_ray_opt,
+; but this is not the case in practice.
 define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
 ; SSE-LABEL: PR39665_c_ray:
 ; SSE:       # %bb.0:
@@ -4597,3 +4599,55 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
   %r = select i1 %u, i32 42, i32 99
   ret i32 %r
 }
+
+define i32 @PR39665_c_ray_opt(<2 x double> %x, <2 x double> %y) {
+; SSE-LABEL: PR39665_c_ray_opt:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cmpltpd %xmm0, %xmm1
+; SSE-NEXT:    movmskpd %xmm1, %eax
+; SSE-NEXT:    cmpb $3, %al
+; SSE-NEXT:    movl $42, %ecx
+; SSE-NEXT:    movl $99, %eax
+; SSE-NEXT:    cmovel %ecx, %eax
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: PR39665_c_ray_opt:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT:    vmovmskpd %xmm0, %eax
+; AVX1OR2-NEXT:    cmpb $3, %al
+; AVX1OR2-NEXT:    movl $42, %ecx
+; AVX1OR2-NEXT:    movl $99, %eax
+; AVX1OR2-NEXT:    cmovel %ecx, %eax
+; AVX1OR2-NEXT:    retq
+;
+; KNL-LABEL: PR39665_c_ray_opt:
+; KNL:       # %bb.0:
+; KNL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL-NEXT:    vcmpltpd %zmm0, %zmm1, %k0
+; KNL-NEXT:    knotw %k0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    testb $3, %al
+; KNL-NEXT:    movl $42, %ecx
+; KNL-NEXT:    movl $99, %eax
+; KNL-NEXT:    cmovel %ecx, %eax
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: PR39665_c_ray_opt:
+; SKX:       # %bb.0:
+; SKX-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
+; SKX-NEXT:    kmovd %k0, %eax
+; SKX-NEXT:    cmpb $3, %al
+; SKX-NEXT:    movl $42, %ecx
+; SKX-NEXT:    movl $99, %eax
+; SKX-NEXT:    cmovel %ecx, %eax
+; SKX-NEXT:    retq
+  %cmp = fcmp ogt <2 x double> %x, %y
+  %shift = shufflevector <2 x i1> %cmp, <2 x i1> poison, <2 x i32> <i32 1, i32 undef>
+  %1 = and <2 x i1> %cmp, %shift
+  %u = extractelement <2 x i1> %1, i64 0
+  %r = select i1 %u, i32 42, i32 99
+  ret i32 %r
+}
diff --git a/llvm/test/CodeGen/X86/sink-out-of-loop.ll b/llvm/test/CodeGen/X86/sink-out-of-loop.ll
index e7b721d..e3443b9 100644
--- a/llvm/test/CodeGen/X86/sink-out-of-loop.ll
+++ b/llvm/test/CodeGen/X86/sink-out-of-loop.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
 
 ; A MOV32ri is inside a loop, it has two successors, one successor is inside the
@@ -5,15 +6,48 @@
 ; MOV32ri outside the loop.
 ; rdar://11980766
 define i32 @sink_succ(i32 %argc, i8** nocapture %argv) nounwind uwtable ssp {
-; CHECK-LABEL: sink_succ
-; CHECK: [[OUTER_LN1:LBB0_[0-9]+]]: ## %preheader
-; CHECK: %exit
-; CHECK-NOT: movl
-; CHECK: jne [[OUTER_LN1]]
-; CHECK: movl
-; CHECK: [[LN2:LBB0_[0-9]+]]: ## %for.body2
-; CHECK: jne [[LN2]]
-; CHECK: ret
+; CHECK-LABEL: sink_succ:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  LBB0_1: ## %preheader
+; CHECK-NEXT:    ## =>This Loop Header: Depth=1
+; CHECK-NEXT:    ## Child Loop BB0_2 Depth 2
+; CHECK-NEXT:    ## Child Loop BB0_3 Depth 3
+; CHECK-NEXT:    movl $1, %ecx
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  LBB0_2: ## %for.body1.lr
+; CHECK-NEXT:    ## Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    ## => This Loop Header: Depth=2
+; CHECK-NEXT:    ## Child Loop BB0_3 Depth 3
+; CHECK-NEXT:    movl %ecx, %edx
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  LBB0_3: ## %for.body1
+; CHECK-NEXT:    ## Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    ## Parent Loop BB0_2 Depth=2
+; CHECK-NEXT:    ## => This Inner Loop Header: Depth=3
+; CHECK-NEXT:    decl %edx
+; CHECK-NEXT:    jne LBB0_3
+; CHECK-NEXT:  ## %bb.4: ## %for.inc40.i
+; CHECK-NEXT:    ## in Loop: Header=BB0_2 Depth=2
+; CHECK-NEXT:    incl %ecx
+; CHECK-NEXT:    cmpl $32, %ecx
+; CHECK-NEXT:    jne LBB0_2
+; CHECK-NEXT:  ## %bb.5: ## %exit
+; CHECK-NEXT:    ## in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    incl %eax
+; CHECK-NEXT:    cmpl $10, %eax
+; CHECK-NEXT:    jne LBB0_1
+; CHECK-NEXT:  ## %bb.6: ## %for.body2.preheader
+; CHECK-NEXT:    movl $2048, %eax ## imm = 0x800
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  LBB0_7: ## %for.body2
+; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    decl %eax
+; CHECK-NEXT:    jne LBB0_7
+; CHECK-NEXT:  ## %bb.8: ## %for.end20
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    retq
 entry:
   br label %preheader
 
@@ -55,6 +89,19 @@ for.end20:
 
 define i32 @sink_out_of_loop(i32 %n, i32* %output) {
 ; CHECK-LABEL: sink_out_of_loop:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  LBB1_1: ## %loop
+; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    movl %ecx, (%rsi,%rcx,4)
+; CHECK-NEXT:    incq %rcx
+; CHECK-NEXT:    cmpl %edi, %ecx
+; CHECK-NEXT:    jl LBB1_1
+; CHECK-NEXT:  ## %bb.2: ## %exit
+; CHECK-NEXT:    imull %eax, %eax
+; CHECK-NEXT:    retq
 entry:
   br label %loop
 
@@ -68,8 +115,5 @@ loop:
   br i1 %exit_cond, label %exit, label %loop
 
 exit:
-; CHECK: %bb.2
-; CHECK: imull %eax, %eax
-; CHECK: retq
   ret i32 %j
 }
diff --git a/llvm/test/DebugInfo/X86/dbg-value-funcarg.ll b/llvm/test/DebugInfo/X86/dbg-value-funcarg.ll
index 4818fbf..f1edf09 100644
--- a/llvm/test/DebugInfo/X86/dbg-value-funcarg.ll
+++ b/llvm/test/DebugInfo/X86/dbg-value-funcarg.ll
@@ -122,14 +122,13 @@ define dso_local void @foo_same_param(i32 %t3a) local_unnamed_addr #0 !dbg !31 {
 ; CHECK: DBG_VALUE %0, $noreg, ![[T3A]], !DIExpression(),
 ; CHECK: TCRETURNdi64 @bar,
 ; INSTRREF-LABEL: name:            foo_same_param
-; INSTRREF: DBG_PHI $edi, 2
 ; INSTRREF: DBG_PHI $edi, 1
 ; INSTRREF: DBG_VALUE $edi, $noreg, ![[T3A]], !DIExpression(),
 ; INSTRREF: CALL64pcrel32 @bar,
 ; INSTRREF: DBG_INSTR_REF 1, 0, ![[TMP]], !DIExpression(),
 ; INSTRREF: DBG_VALUE 123, $noreg, ![[T3A]], !DIExpression(),
 ; INSTRREF: CALL64pcrel32 @bar,
-; INSTRREF: DBG_INSTR_REF 2, 0, ![[T3A]], !DIExpression(),
+; INSTRREF: DBG_INSTR_REF 1, 0, ![[T3A]], !DIExpression(),
 ; INSTRREF: TCRETURNdi64 @bar,
 entry:
   call void @llvm.dbg.value(metadata i32 %t3a, metadata !33, metadata !DIExpression()), !dbg !35
diff --git a/llvm/test/DebugInfo/X86/dbg-value-funcarg4.ll b/llvm/test/DebugInfo/X86/dbg-value-funcarg4.ll
new file mode 100644
index 0000000..df52a39
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/dbg-value-funcarg4.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before=finalize-isel -o - %s -experimental-debug-variable-locations=true | FileCheck %s --implicit-check-not=DBG_PHI
+
+; Test that for multiple dbg.values referring to the same argument, we emit a
+; single DBG_PHI and refer to it twice. (Using more than one DBG_PHI is fine,
+; but inefficient).
+
+; CHECK-DAG: ![[LOCAL:.*]] = !DILocalVariable(name: "local"
+; CHECK-DAG: ![[LOCAL2:.*]] = !DILocalVariable(name: "local2"
+
+; CHECK: DBG_PHI $edi, 1
+
+; CHECK: DBG_INSTR_REF 1, 0, ![[LOCAL]], !DIExpression(),
+; CHECK: DBG_INSTR_REF 1, 0, ![[LOCAL2]], !DIExpression(),
+
+declare void @bar(i32)
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+define dso_local void @foo_local(i32 %t1a) local_unnamed_addr !dbg !7 {
+entry:
+  tail call void @bar(i32 %t1a) #3, !dbg !17
+  %bees = add i32 %t1a, 3
+  call void @llvm.dbg.value(metadata i32 %t1a, metadata !13, metadata !DIExpression()), !dbg !14
+  call void @llvm.dbg.value(metadata i32 %t1a, metadata !19, metadata !DIExpression()), !dbg !14
+  tail call void @bar(i32 %bees) #3, !dbg !17
+  ret void, !dbg !18
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "foo.c", directory: "")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang"}
+!7 = distinct !DISubprogram(name: "foo_local", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !{!12, !13}
+!12 = !DILocalVariable(name: "t1a", arg: 1, scope: !7, file: !1, line: 3, type: !10)
+!13 = !DILocalVariable(name: "local", scope: !7, file: !1, line: 4, type: !10)
+!14 = !DILocation(line: 3, column: 20, scope: !7)
+!15 = !DILocation(line: 4, column: 7, scope: !7)
+!16 = !DILocation(line: 5, column: 3, scope: !7)
+!17 = !DILocation(line: 7, column: 3, scope: !7)
+!18 = !DILocation(line: 8, column: 1, scope: !7)
+!19 = !DILocalVariable(name: "local2", scope: !7, file: !1, line: 4, type: !10)
diff --git a/llvm/test/DebugInfo/X86/instr-ref-selectiondag.ll b/llvm/test/DebugInfo/X86/instr-ref-selectiondag.ll
index 465d496..34eab5e 100644
--- a/llvm/test/DebugInfo/X86/instr-ref-selectiondag.ll
+++ b/llvm/test/DebugInfo/X86/instr-ref-selectiondag.ll
@@ -237,12 +237,11 @@ shoes:
 
 ; FASTISEL-INSTRREF-LABEL: name: qux
 
-; FASTISEL-INSTRREF:      DBG_PHI $rdi, 2
-; FASTISEL-INSTRREF-NEXT: DBG_PHI $rdi, 1
+; FASTISEL-INSTRREF:      DBG_PHI $rdi, 1
 ; FASTISEL-INSTRREF:      DBG_INSTR_REF 1, 0, ![[SOCKS]], !DIExpression(DW_OP_deref),
 
 ; FASTISEL-INSTRREF-LABEL: bb.1.lala:
-; FASTISEL-INSTRREF:      DBG_INSTR_REF 2, 0, ![[KNEES]], !DIExpression(DW_OP_deref),
+; FASTISEL-INSTRREF:      DBG_INSTR_REF 1, 0, ![[KNEES]], !DIExpression(DW_OP_deref),
 declare i64 @cheddar(i32 *%arg)
 
 define void @qux(i32* noalias sret(i32) %agg.result) !dbg !40 {
diff --git a/llvm/test/Instrumentation/HeapProfiler/no-instrumentation.ll b/llvm/test/Instrumentation/HeapProfiler/no-instrumentation.ll
new file mode 100644
index 0000000..c05c013
--- /dev/null
+++ b/llvm/test/Instrumentation/HeapProfiler/no-instrumentation.ll
@@ -0,0 +1,23 @@
+;; Test that we don't add any instrumentation code to functions without
+;; interesting memory accesses.
+;
+; RUN: opt < %s -passes='function(memprof),module(memprof-module)' -S -debug 2>&1 | FileCheck %s
+
+;; Require asserts for -debug
+; REQUIRES: asserts
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @_Z3foov() {
+entry:
+  ret void
+}
+
+;; Confirm we ran memprof and decided not to instrument
+; CHECK: MEMPROF done instrumenting: 0 define void @_Z3foov
+
+;; We should not add any instrumentation related code
+; CHECK: define void @_Z3foov
+; CHECK-NEXT: entry:
+; CHECK-NEXT:  ret void
diff --git a/llvm/test/Instrumentation/HeapProfiler/skip-compiler-inserted.ll b/llvm/test/Instrumentation/HeapProfiler/skip-compiler-inserted.ll
new file mode 100644
index 0000000..716974d
--- /dev/null
+++ b/llvm/test/Instrumentation/HeapProfiler/skip-compiler-inserted.ll
@@ -0,0 +1,47 @@
+;; Test that we don't instrument loads to PGO counters or other
+;; compiler inserted variables.
+;
+; RUN: opt < %s -passes='function(memprof),module(memprof-module)' -S | FileCheck --check-prefixes=CHECK %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+$__profc__Z3foov = comdat nodeduplicate
+@__profc__Z3foov = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
+@__llvm_gcov_ctr = internal global [1 x i64] zeroinitializer
+
+define void @_Z3foov(i32* %a) {
+entry:
+  ;; Load that should get instrumentation.
+  %tmp1 = load i32, i32* %a, align 4
+  ;; PGO counter update
+  %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3foov, i64 0, i64 0), align 8
+  %0 = add i64 %pgocount, 1
+  store i64 %0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3foov, i64 0, i64 0), align 8
+  ;; Gcov counter update
+  %gcovcount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_gcov_ctr, i64 0, i64 0), align 8
+  %1 = add i64 %gcovcount, 1
+  store i64 %1, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_gcov_ctr, i64 0, i64 0), align 8
+  ret void
+}
+
+;; We should only add memory profile instrumentation for the first load.
+; CHECK: define void @_Z3foov
+; CHECK-NEXT: entry:
+; CHECK-NEXT:  %0 = load i64, i64* @__memprof_shadow_memory_dynamic_address, align 8
+; CHECK-NEXT:  %1 = ptrtoint i32* %a to i64
+; CHECK-NEXT:  %2 = and i64 %1, -64
+; CHECK-NEXT:  %3 = lshr i64 %2, 3
+; CHECK-NEXT:  %4 = add i64 %3, %0
+; CHECK-NEXT:  %5 = inttoptr i64 %4 to i64*
+; CHECK-NEXT:  %6 = load i64, i64* %5, align 8
+; CHECK-NEXT:  %7 = add i64 %6, 1
+; CHECK-NEXT:  store i64 %7, i64* %5, align 8
+; CHECK-NEXT:  %tmp1 = load i32, i32* %a, align 4
+; CHECK-NEXT:  %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3foov, i64 0, i64 0)
+; CHECK-NEXT:  %8 = add i64 %pgocount, 1
+; CHECK-NEXT:  store i64 %8, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3foov, i64 0, i64 0)
+; CHECK-NEXT:  %gcovcount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_gcov_ctr, i64 0, i64 0)
+; CHECK-NEXT:  %9 = add i64 %gcovcount, 1
+; CHECK-NEXT:  store i64 %9, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_gcov_ctr, i64 0, i64 0)
+; CHECK-NEXT:  ret void
diff --git a/llvm/test/MC/AArch64/armv8.2a-dotprod.s b/llvm/test/MC/AArch64/armv8.2a-dotprod.s
index c561fbc..45dc8d0 100644
--- a/llvm/test/MC/AArch64/armv8.2a-dotprod.s
+++ b/llvm/test/MC/AArch64/armv8.2a-dotprod.s
@@ -13,6 +13,7 @@
 // RUN: llvm-mc -triple aarch64 -mcpu=tsv110 -show-encoding < %s | FileCheck %s --check-prefix=CHECK-DOTPROD
 // RUN: llvm-mc -triple aarch64 -mcpu=cortex-r82 -show-encoding < %s | FileCheck %s --check-prefix=CHECK-DOTPROD
 // RUN: llvm-mc -triple aarch64 -mattr=+v8r -show-encoding < %s | FileCheck %s --check-prefix=CHECK-DOTPROD
+// RUN: llvm-mc -triple aarch64 -mcpu=ampere1 -show-encoding < %s | FileCheck %s --check-prefix=CHECK-DOTPROD
 
 // RUN: not llvm-mc -triple aarch64 -mattr=+v8.2a -show-encoding < %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
@@ -36,6 +37,8 @@
 // RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
 // RUN: not llvm-mc -triple aarch64 -mcpu=neoverse-n2 -mattr=-dotprod -show-encoding < %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
+// RUN: not llvm-mc -triple aarch64 -mcpu=ampere1 -mattr=-dotprod -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
 
 udot v0.2s, v1.8b, v2.8b
 sdot v0.2s, v1.8b, v2.8b
diff --git a/llvm/test/MC/AArch64/armv8.3a-rcpc.s b/llvm/test/MC/AArch64/armv8.3a-rcpc.s
index 1660cc4..271eea5 100644
--- a/llvm/test/MC/AArch64/armv8.3a-rcpc.s
+++ b/llvm/test/MC/AArch64/armv8.3a-rcpc.s
@@ -6,6 +6,7 @@
 // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mcpu=neoverse-e1 < %s 2>&1 | FileCheck %s
 // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mcpu=neoverse-n1 < %s 2>&1 | FileCheck %s
 // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mcpu=neoverse-n2 < %s 2>&1 | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mcpu=ampere1 < %s 2>&1 | FileCheck %s
 // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a -mattr=+rcpc < %s 2>&1 | FileCheck %s
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a < %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-REQ %s < %t
diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt b/llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
index c6d7c79..d7d246b 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
@@ -12,6 +12,7 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-e1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n2 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=ampere1 --disassemble < %s | FileCheck %s
 
 # CHECK: ldaprb w0, [x0]
 # CHECK: ldaprh w0, [x0]
diff --git a/llvm/test/MC/SystemZ/insn-bad.s b/llvm/test/MC/SystemZ/insn-bad.s
index dbe5b71..e616026 100644
--- a/llvm/test/MC/SystemZ/insn-bad.s
+++ b/llvm/test/MC/SystemZ/insn-bad.s
@@ -512,6 +512,8 @@
 #CHECK: error: offset out of range
 #CHECK: brasl	%r0, -0x1000000002
 #CHECK: error: offset out of range
+#CHECK: brasl	%r0, .-0x1000000002
+#CHECK: error: offset out of range
 #CHECK: brasl	%r0, -1
 #CHECK: error: offset out of range
 #CHECK: brasl	%r0, 1
@@ -520,6 +522,8 @@
 #CHECK: error: offset out of range
 #CHECK: jasl	%r0, -0x1000000002
 #CHECK: error: offset out of range
+#CHECK: jasl	%r0, .-0x1000000002
+#CHECK: error: offset out of range
 #CHECK: jasl	%r0, -1
 #CHECK: error: offset out of range
 #CHECK: jasl	%r0, 1
@@ -527,10 +531,12 @@
 #CHECK: jasl	%r0, 0x100000000
 
 	brasl	%r0, -0x1000000002
+	brasl	%r0, .-0x1000000002
 	brasl	%r0, -1
 	brasl	%r0, 1
 	brasl	%r0, 0x100000000
 	jasl	%r0, -0x1000000002
+	jasl	%r0, .-0x1000000002
 	jasl	%r0, -1
 	jasl	%r0, 1
 	jasl	%r0, 0x100000000
diff --git a/llvm/test/MC/SystemZ/insn-good.s b/llvm/test/MC/SystemZ/insn-good.s
index ce610b5..e7f73c7 100644
--- a/llvm/test/MC/SystemZ/insn-good.s
+++ b/llvm/test/MC/SystemZ/insn-good.s
@@ -1263,6 +1263,12 @@
 #CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
 	brasl	%r0, -0x100000000
 	jasl	%r0, -0x100000000
+#CHECK: brasl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc0,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc0,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	brasl	%r0, .-0x100000000
+	jasl	%r0, .-0x100000000
 #CHECK: brasl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc0,0x05,A,A,A,A]
 #CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
 #CHECK: brasl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc0,0x05,A,A,A,A]
diff --git a/llvm/test/TableGen/VarLenDecoder.td b/llvm/test/TableGen/VarLenDecoder.td
new file mode 100644
index 0000000..657ca6a
--- /dev/null
+++ b/llvm/test/TableGen/VarLenDecoder.td
@@ -0,0 +1,87 @@
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+
+include "llvm/Target/Target.td"
+
+def ArchInstrInfo : InstrInfo { }
+
+def Arch : Target {
+  let InstructionSet = ArchInstrInfo;
+}
+
+def Reg : Register<"reg">;
+
+def RegClass : RegisterClass<"foo", [i64], 0, (add Reg)>;
+
+def GR64 : RegisterOperand<RegClass>;
+
+class MyMemOperand<dag sub_ops> : Operand<iPTR> {
+  let MIOperandInfo = sub_ops;
+  dag Base;
+  dag Extension;
+}
+
+def MemOp16: MyMemOperand<(ops GR64:$reg, i16imm:$offset)>;
+
+def MemOp32: MyMemOperand<(ops GR64:$reg, i32imm:$offset)>;
+
+class MyVarInst<MyMemOperand memory_op> : Instruction {
+  dag Inst;
+
+  let OutOperandList = (outs GR64:$dst);
+  let InOperandList  = (ins memory_op:$src);
+}
+
+def FOO16 : MyVarInst<MemOp16> {
+  let Inst = (ascend
+      (descend (operand "$dst", 3), 0b01000, (operand "$src.reg", 3)),
+      (slice "$src.offset", 15, 0)
+  );
+}
+def FOO32 : MyVarInst<MemOp32> {
+  let Inst = (ascend
+      (descend (operand "$dst", 3), 0b01001, (operand "$src.reg", 3)),
+      (slice "$src.offset", 31, 16),
+      (slice "$src.offset", 15, 0)
+  );
+}
+
+// CHECK:      MCD::OPC_ExtractField, 3, 5,  // Inst{7-3} ...
+// CHECK-NEXT: MCD::OPC_FilterValue, 8, 4, 0, 0, // Skip to: 12
+// CHECK-NEXT: MCD::OPC_Decode, 244, 1, 0, // Opcode: FOO16
+// CHECK-NEXT: MCD::OPC_FilterValue, 9, 4, 0, 0, // Skip to: 21
+// CHECK-NEXT: MCD::OPC_Decode, 245, 1, 1, // Opcode: FOO32
+// CHECK-NEXT: MCD::OPC_Fail,
+
+// Instruction length table
+// CHECK: 27,
+// CHECK-NEXT: 43,
+// CHECK-NEXT: };
+
+// CHECK:      case 0:
+// CHECK-NEXT: tmp = fieldFromInstruction(insn, 8, 3);
+// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; }
+// CHECK-NEXT: tmp = fieldFromInstruction(insn, 0, 3);
+// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; }
+// CHECK-NEXT: tmp = fieldFromInstruction(insn, 11, 16);
+// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp));
+// CHECK-NEXT: return S;
+// CHECK-NEXT: case 1:
+// CHECK-NEXT: tmp = fieldFromInstruction(insn, 8, 3);
+// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; }
+// CHECK-NEXT: tmp = fieldFromInstruction(insn, 0, 3);
+// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; }
+// CHECK-NEXT: tmp = 0x0;
+// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 11, 16), 16, 16);
+// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 27, 16), 0, 16);
+// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp));
+// CHECK-NEXT: return S;
+
+// CHECK-LABEL: case MCD::OPC_ExtractField: {
+// CHECK: makeUp(insn, Start + Len);
+
+// CHECK-LABEL: case MCD::OPC_CheckField: {
+// CHECK: makeUp(insn, Start + Len);
+
+// CHECK-LABEL: case MCD::OPC_Decode: {
+// CHECK: Len = InstrLenTable[Opc];
+// CHECK-NEXT: makeUp(insn, Len);
diff --git a/llvm/test/Transforms/ArgumentPromotion/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/min-legal-vector-width.ll
new file mode 100644
index 0000000..e7a18929
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/min-legal-vector-width.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
+
+; CHECK-LABEL: define i32 @foo() #0 {
+; CHECK-NEXT:      %.val = load <32 x half>, <32 x half>* undef, align 4
+; CHECK-NEXT:      call void @bar(<32 x half> %.val)
+; CHECK-NEXT:      ret i32 0
+; CHECK-NEXT:    }
+
+; CHECK-LABEL: define internal void @bar(<32 x half> %.0.val) #0 {
+; CHECK-NEXT:      ret void
+; CHECK-NEXT:    }
+
+; CHECK:    attributes #0 = { uwtable "min-legal-vector-width"="512" }
+
+define i32 @foo() #0 {
+  call void @bar(<32 x half>* undef)
+  ret i32 0
+}
+
+define internal void @bar(<32 x half>*) #0 {
+  %2 = load <32 x half>, <32 x half>* %0, align 4
+  ret void
+}
+
+attributes #0 = { uwtable "min-legal-vector-width"="0" }
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
index 718ae4a..2b2b113 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -57,9 +57,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>*
 ; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6:[0-9]+]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
-; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7:[0-9]+]]
+; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__TUNIT_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__TUNIT_NPM-NEXT:    ret void
@@ -84,9 +84,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>*
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
-; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__CGSCC_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__CGSCC_NPM-NEXT:    ret void
@@ -151,9 +151,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>*
 ; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
-; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
+; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__TUNIT_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__TUNIT_NPM-NEXT:    ret void
@@ -178,9 +178,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>*
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
-; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
+; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__CGSCC_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__CGSCC_NPM-NEXT:    ret void
@@ -245,9 +245,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>*
 ; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
-; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
+; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__TUNIT_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__TUNIT_NPM-NEXT:    ret void
@@ -272,9 +272,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>*
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
-; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
+; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__CGSCC_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__CGSCC_NPM-NEXT:    ret void
@@ -339,9 +339,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>*
 ; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
-; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
+; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__TUNIT_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__TUNIT_NPM-NEXT:    ret void
@@ -366,9 +366,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>*
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
-; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
+; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__CGSCC_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__CGSCC_NPM-NEXT:    ret void
@@ -431,8 +431,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>*
 ; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
-; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__TUNIT_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__TUNIT_NPM-NEXT:    ret void
@@ -457,8 +457,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>*
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
-; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__CGSCC_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__CGSCC_NPM-NEXT:    ret void
@@ -521,8 +521,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>*
 ; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
-; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__TUNIT_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__TUNIT_NPM-NEXT:    ret void
@@ -547,8 +547,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>*
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
-; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__CGSCC_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__CGSCC_NPM-NEXT:    ret void
@@ -608,14 +608,14 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar
 ;
 ; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; IS__TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
+; IS__TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR3]] {
 ; IS__TUNIT_NPM-NEXT:  bb:
 ; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
-; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
+; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__TUNIT_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__TUNIT_NPM-NEXT:    ret void
@@ -635,14 +635,14 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar
 ;
 ; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; IS__CGSCC_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
+; IS__CGSCC_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR3]] {
 ; IS__CGSCC_NPM-NEXT:  bb:
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
-; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
+; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__CGSCC_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__CGSCC_NPM-NEXT:    ret void
@@ -671,7 +671,7 @@ define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_p
 ;
 ; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
 ; IS________NPM-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] {
+; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] {
 ; IS________NPM-NEXT:  bb:
 ; IS________NPM-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
 ; IS________NPM-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
@@ -707,9 +707,9 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar
 ; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; IS__TUNIT_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
-; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
+; IS__TUNIT_NPM-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
 ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__TUNIT_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__TUNIT_NPM-NEXT:    ret void
@@ -734,9 +734,9 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
-; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; IS__CGSCC_NPM-NEXT:    call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
-; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
+; IS__CGSCC_NPM-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
 ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; IS__CGSCC_NPM-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; IS__CGSCC_NPM-NEXT:    ret void
@@ -762,21 +762,37 @@ attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2
 attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
 attributes #5 = { argmemonly nounwind }
 ;.
-; IS__TUNIT____: attributes #[[ATTR0:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }
-; IS__TUNIT____: attributes #[[ATTR1:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
-; IS__TUNIT____: attributes #[[ATTR2:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
-; IS__TUNIT____: attributes #[[ATTR3:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" }
-; IS__TUNIT____: attributes #[[ATTR4:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx2" }
-; IS__TUNIT____: attributes #[[ATTR5:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly }
-; IS__TUNIT____: attributes #[[ATTR6:[0-9]+]] = { willreturn writeonly }
-; IS__TUNIT____: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind willreturn }
+; IS__TUNIT_OPM: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }
+; IS__TUNIT_OPM: attributes #[[ATTR1]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
+; IS__TUNIT_OPM: attributes #[[ATTR2]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
+; IS__TUNIT_OPM: attributes #[[ATTR3]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" }
+; IS__TUNIT_OPM: attributes #[[ATTR4]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx2" }
+; IS__TUNIT_OPM: attributes #[[ATTR5:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly }
+; IS__TUNIT_OPM: attributes #[[ATTR6]] = { willreturn writeonly }
+; IS__TUNIT_OPM: attributes #[[ATTR7]] = { nofree nosync nounwind willreturn }
+;.
+; IS__TUNIT_NPM: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }
+; IS__TUNIT_NPM: attributes #[[ATTR1]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
+; IS__TUNIT_NPM: attributes #[[ATTR2]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
+; IS__TUNIT_NPM: attributes #[[ATTR3]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" }
+; IS__TUNIT_NPM: attributes #[[ATTR4:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly }
+; IS__TUNIT_NPM: attributes #[[ATTR5]] = { willreturn writeonly }
+; IS__TUNIT_NPM: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn }
+;.
+; IS__CGSCC_OPM: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }
+; IS__CGSCC_OPM: attributes #[[ATTR1]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
+; IS__CGSCC_OPM: attributes #[[ATTR2]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
+; IS__CGSCC_OPM: attributes #[[ATTR3]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" }
+; IS__CGSCC_OPM: attributes #[[ATTR4]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx2" }
+; IS__CGSCC_OPM: attributes #[[ATTR5:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly }
+; IS__CGSCC_OPM: attributes #[[ATTR6]] = { willreturn writeonly }
+; IS__CGSCC_OPM: attributes #[[ATTR7]] = { nounwind willreturn }
 ;.
-; IS__CGSCC____: attributes #[[ATTR0:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }
-; IS__CGSCC____: attributes #[[ATTR1:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
-; IS__CGSCC____: attributes #[[ATTR2:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
-; IS__CGSCC____: attributes #[[ATTR3:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" }
-; IS__CGSCC____: attributes #[[ATTR4:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx2" }
-; IS__CGSCC____: attributes #[[ATTR5:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly }
-; IS__CGSCC____: attributes #[[ATTR6:[0-9]+]] = { willreturn writeonly }
-; IS__CGSCC____: attributes #[[ATTR7:[0-9]+]] = { nounwind willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }
+; IS__CGSCC_NPM: attributes #[[ATTR1]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
+; IS__CGSCC_NPM: attributes #[[ATTR2]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
+; IS__CGSCC_NPM: attributes #[[ATTR3]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" }
+; IS__CGSCC_NPM: attributes #[[ATTR4:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly }
+; IS__CGSCC_NPM: attributes #[[ATTR5]] = { willreturn writeonly }
+; IS__CGSCC_NPM: attributes #[[ATTR6]] = { nounwind willreturn }
 ;.
diff --git a/llvm/test/Transforms/GlobalOpt/malloc-promote-6.ll b/llvm/test/Transforms/GlobalOpt/malloc-promote-6.ll
new file mode 100644
index 0000000..f35e1ae
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/malloc-promote-6.ll
@@ -0,0 +1,30 @@
+; RUN: opt -passes=globalopt -S < %s | FileCheck %s
+
+; CHECK-NOT: @global
+
+@global = internal global i8* null
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @zot, i8* null }]
+
+declare i8* @_Znwm(i64)
+
+define internal void @widget() {
+  %tmp = tail call i8* @_Znwm(i64 0)
+  %tmp2 = getelementptr inbounds i8, i8* %tmp, i64 0
+  store i8* %tmp, i8** @global, align 8
+  call void @baz(void ()* @spam)
+  ret void
+}
+
+define internal void @spam() {
+  %tmp = load i8*, i8** @global, align 8
+  %tmp2 = getelementptr inbounds i8, i8* %tmp, i64 0
+  ret void
+}
+
+define internal void @zot() {
+  call void @baz(void ()* @widget)
+  ret void
+}
+
+declare void @baz(void ()*)
+
diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
index 950a8ce..4b8d944 100644
--- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
+++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
@@ -3,7 +3,6 @@
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -inferattrs -S | FileCheck --match-full-lines --check-prefixes=CHECK,CHECK-KNOWN,CHECK-NOLINUX,CHECK-OPEN,CHECK-DARWIN %s
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -inferattrs -S | FileCheck --match-full-lines --check-prefixes=CHECK,CHECK-KNOWN,CHECK-LINUX %s
 ; RUN: opt < %s -mtriple=nvptx -inferattrs -S | FileCheck --match-full-lines --check-prefixes=CHECK-NOLINUX,CHECK-NVPTX %s
-; RUN: opt < %s -mtriple=s390x-linux-gnu -inferattrs -S | FileCheck --check-prefixes=CHECK-SYSTEMZ %s
 
 declare i32 @__nvvm_reflect(i8*)
 ; CHECK-NVPTX: declare noundef i32 @__nvvm_reflect(i8* noundef) [[NOFREE_NOUNWIND_READNONE:#[0-9]+]]
@@ -220,7 +219,7 @@ declare x86_fp80 @acoshl(x86_fp80)
 ; CHECK: declare x86_fp80 @acosl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
 declare x86_fp80 @acosl(x86_fp80)
 
-; CHECK: declare noalias noundef i8* @aligned_alloc(i64 allocalign noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]]
+; CHECK: declare noalias noundef i8* @aligned_alloc(i64 allocalign noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE1_FAMILY_MALLOC:#[0-9]+]]
 declare i8* @aligned_alloc(i64, i64)
 
 ; CHECK: declare double @asin(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
@@ -290,7 +289,7 @@ declare void @bcopy(i8*, i8*, i64)
 ; CHECK: declare void @bzero(i8* nocapture writeonly, i64)  [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]]
 declare void @bzero(i8*, i64)
 
-; CHECK: declare noalias noundef i8* @calloc(i64 noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01:#[0-9]+]]
+; CHECK: declare noalias noundef i8* @calloc(i64 noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_MALLOC:#[0-9]+]]
 declare i8* @calloc(i64, i64)
 
 ; CHECK: declare double @cbrt(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
@@ -490,7 +489,7 @@ declare i32 @fputs(i8*, %opaque*)
 ; CHECK: declare noundef i64 @fread(i8* nocapture noundef, i64 noundef, i64 noundef, %opaque* nocapture noundef) [[NOFREE_NOUNWIND]]
 declare i64 @fread(i8*, i64, i64, %opaque*)
 
-; CHECK: declare void @free(i8* nocapture noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN:#[0-9]+]]
+; CHECK: declare void @free(i8* allocptr nocapture noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_MALLOC:#[0-9]+]]
 declare void @free(i8*)
 
 ; CHECK: declare double @frexp(double, i32* nocapture) [[NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]]
@@ -592,11 +591,9 @@ declare i64 @labs(i64)
 declare i32 @lchown(i8*, i32, i32)
 
 ; CHECK: declare double @ldexp(double, i32) [[NOFREE_WILLRETURN:#[0-9]+]]
-; CHECK-SYSTEMZ: declare double @ldexp(double, i32 signext)
 declare double @ldexp(double, i32)
 
 ; CHECK: declare float @ldexpf(float, i32) [[NOFREE_WILLRETURN]]
-; CHECK-SYSTEMZ: declare float @ldexpf(float, i32 signext)
 declare float @ldexpf(float, i32)
 
 ; CHECK: declare x86_fp80 @ldexpl(x86_fp80, i32) [[NOFREE_WILLRETURN]]
@@ -656,7 +653,7 @@ declare i32 @lstat(i8*, %opaque*)
 ; CHECK-LINUX: declare noundef i32 @lstat64(i8* nocapture noundef readonly, %opaque* nocapture noundef) [[NOFREE_NOUNWIND]]
 declare i32 @lstat64(i8*, %opaque*)
 
-; CHECK: declare noalias noundef i8* @malloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE:#[0-9]+]]
+; CHECK: declare noalias noundef i8* @malloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_MALLOC:#[0-9]+]]
 declare i8* @malloc(i64)
 
 ; CHECK-LINUX: declare noalias noundef i8* @memalign(i64 allocalign, i64) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]]
@@ -756,12 +753,10 @@ declare i32 @printf(i8*, ...)
 declare i32 @putc(i32, %opaque*)
 
 ; CHECK: declare noundef i32 @putchar(i32 noundef) [[NOFREE_NOUNWIND]]
-; CHECK-SYSTEMZ: declare noundef i32 @putchar(i32 noundef signext)
 declare i32 @putchar(i32)
 
 ; CHECK-KNOWN: declare noundef i32 @putchar_unlocked(i32 noundef) [[NOFREE_NOUNWIND]]
 ; CHECK-UNKNOWN: declare i32 @putchar_unlocked(i32){{$}}
-; CHECK-SYSTEMZ: declare noundef i32 @putchar_unlocked(i32 noundef signext)
 declare i32 @putchar_unlocked(i32)
 
 ; CHECK: declare noundef i32 @puts(i8* nocapture noundef readonly) [[NOFREE_NOUNWIND]]
@@ -779,10 +774,10 @@ declare i64 @read(i32, i8*, i64)
 ; CHECK: declare noundef i64 @readlink(i8* nocapture noundef readonly, i8* nocapture noundef, i64 noundef) [[NOFREE_NOUNWIND]]
 declare i64 @readlink(i8*, i8*, i64)
 
-; CHECK: declare noalias noundef i8* @realloc(i8* nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE:#[0-9]+]]
+; CHECK: declare noalias noundef i8* @realloc(i8* allocptr nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_MALLOC:#[0-9]+]]
 declare i8* @realloc(i8*, i64)
 
-; CHECK: declare noalias noundef i8* @reallocf(i8* nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE]]
+; CHECK: declare noalias noundef i8* @reallocf(i8* allocptr nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_MALLOC]]
 declare i8* @reallocf(i8*, i64)
 
 ; CHECK: declare noundef i8* @realpath(i8* nocapture noundef readonly, i8* noundef) [[NOFREE_NOUNWIND]]
@@ -905,7 +900,7 @@ declare i8* @strcpy(i8*, i8*)
 ; CHECK: declare i64 @strcspn(i8* nocapture, i8* nocapture) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY]]
 declare i64 @strcspn(i8*, i8*)
 
-; CHECK: declare noalias i8* @strdup(i8* nocapture readonly) [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]]
+; CHECK: declare noalias i8* @strdup(i8* nocapture readonly) [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC:#[0-9]+]]
 declare i8* @strdup(i8*)
 
 ; CHECK: declare i64 @strlen(i8* nocapture) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY]]
@@ -923,7 +918,7 @@ declare i32 @strncmp(i8*, i8*, i64)
 ; CHECK: declare i8* @strncpy(i8* noalias returned writeonly, i8* noalias nocapture readonly, i64) [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]]
 declare i8* @strncpy(i8*, i8*, i64)
 
-; CHECK: declare noalias i8* @strndup(i8* nocapture readonly, i64 noundef) [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN]]
+; CHECK: declare noalias i8* @strndup(i8* nocapture readonly, i64 noundef) [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC]]
 declare i8* @strndup(i8*, i64)
 
 ; CHECK: declare i64 @strnlen(i8* nocapture, i64) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN:#[0-9]+]]
@@ -1031,7 +1026,7 @@ declare i32 @utime(i8*, %opaque*)
 ; CHECK: declare noundef i32 @utimes(i8* nocapture noundef readonly, %opaque* nocapture noundef readonly) [[NOFREE_NOUNWIND]]
 declare i32 @utimes(i8*, %opaque*)
 
-; CHECK: declare noalias noundef i8* @valloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE]]
+; CHECK: declare noalias noundef i8* @valloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_MALLOC]]
 declare i8* @valloc(i64)
 
 ; CHECK: declare noundef i32 @vfprintf(%opaque* nocapture noundef, i8* nocapture noundef readonly, %opaque* noundef) [[NOFREE_NOUNWIND]]
@@ -1070,16 +1065,18 @@ declare void @memset_pattern16(i8*, i8*, i64)
 ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { mustprogress nofree nounwind willreturn }
 ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] = { mustprogress nofree nounwind willreturn writeonly }
 ; CHECK-DAG: attributes [[NOFREE_NOUNWIND]] = { nofree nounwind }
-; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0) }
-; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0,1) }
+; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE1_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(1) "alloc-family"="malloc" }
+; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0,1) "alloc-family"="malloc" }
 ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { mustprogress nofree nounwind readonly willreturn }
 ; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] = { argmemonly mustprogress nofree nounwind willreturn }
 ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY]] = { nofree nounwind readonly }
-; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn }
-; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allocsize(1) }
+; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn "alloc-family"="malloc" }
 ; CHECK-DAG: attributes [[NOFREE_WILLRETURN]] = { mustprogress nofree willreturn }
+; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0) "alloc-family"="malloc" }
 ; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { argmemonly mustprogress nofree nounwind readonly willreturn }
 ; CHECK-DAG: attributes [[NOFREE]] = { nofree }
-; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN]]  = { inaccessiblemem_or_argmemonly mustprogress nofree nounwind willreturn }
 ; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND]] = { argmemonly nofree nounwind }
+; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allocsize(1) "alloc-family"="malloc" }
+; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nofree nounwind willreturn "alloc-family"="malloc" }
+
 ; CHECK-NVPTX-DAG: attributes [[NOFREE_NOUNWIND_READNONE]] = { nofree nosync nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/SystemZ/libcall-arg-exts.ll b/llvm/test/Transforms/InstCombine/SystemZ/libcall-arg-exts.ll
new file mode 100644
index 0000000..dbd9204
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/SystemZ/libcall-arg-exts.ll
@@ -0,0 +1,98 @@
+; RUN: opt < %s -passes=instcombine -S -mtriple=systemz-unknown | FileCheck %s
+;
+; Check that i32 arguments to generated libcalls have the proper extension
+; attributes.
+
+
+declare double @exp2(double)
+declare float @exp2f(float)
+declare fp128 @exp2l(fp128)
+
+define double @fun1(i32 %x) {
+; CHECK-LABEL: @fun1
+; CHECK: call double @ldexp
+  %conv = sitofp i32 %x to double
+  %ret = call double @exp2(double %conv)
+  ret double %ret
+}
+
+define float @fun2(i32 %x) {
+; CHECK-LABEL: @fun2
+; CHECK: call float @ldexpf
+  %conv = sitofp i32 %x to float
+  %ret = call float @exp2f(float %conv)
+  ret float %ret
+}
+
+define fp128 @fun3(i8 zeroext %x) {
+; CHECK-LABEL: @fun3
+; CHECK: call fp128 @ldexpl
+  %conv = uitofp i8 %x to fp128
+  %ret = call fp128 @exp2l(fp128 %conv)
+  ret fp128 %ret
+}
+
+@a = common global [60 x i8] zeroinitializer, align 1
+@b = common global [60 x i8] zeroinitializer, align 1
+declare i8* @__memccpy_chk(i8*, i8*, i32, i64, i64)
+define i8* @fun4() {
+; CHECK-LABEL: @fun4
+; CHECK: call i8* @memccpy
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i8* @__memccpy_chk(i8* %dst, i8* %src, i32 0, i64 60, i64 -1)
+  ret i8* %ret
+}
+
+%FILE = type { }
+@A = constant [2 x i8] c"A\00"
+declare i32 @fputs(i8*, %FILE*)
+define void @fun5(%FILE* %fp) {
+; CHECK-LABEL: @fun5
+; CHECK: call i32 @fputc
+  %str = getelementptr [2 x i8], [2 x i8]* @A, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+  ret void
+}
+
+@empty = constant [1 x i8] zeroinitializer
+declare i32 @puts(i8*)
+define void @fun6() {
+; CHECK-LABEL: @fun6
+; CHECK: call i32 @putchar
+  %str = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
+  call i32 @puts(i8* %str)
+  ret void
+}
+
+@.str1 = private constant [2 x i8] c"a\00"
+declare i8* @strstr(i8*, i8*)
+define i8* @fun7(i8* %str) {
+; CHECK-LABEL: @fun7
+; CHECK: call i8* @strchr
+  %pat = getelementptr inbounds [2 x i8], [2 x i8]* @.str1, i32 0, i32 0
+  %ret = call i8* @strstr(i8* %str, i8* %pat)
+  ret i8* %ret
+}
+
+; CHECK: declare i8* @strchr(i8*, i32 signext)
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@chp = global i8* zeroinitializer
+declare i8* @strchr(i8*, i32)
+define void @fun8(i32 %chr) {
+; CHECK-LABEL: @fun8
+; CHECK: call i8* @memchr
+  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %src, i32 %chr)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+; CHECK: declare double @ldexp(double, i32 signext)
+; CHECK: declare float @ldexpf(float, i32 signext)
+; CHECK: declare fp128 @ldexpl(fp128, i32 signext)
+; CHECK: declare i8* @memccpy(i8* noalias writeonly, i8* noalias nocapture readonly, i32 signext, i64)
+; CHECK: declare noundef i32 @fputc(i32 noundef signext, %FILE* nocapture noundef)
+; CHECK: declare noundef i32 @putchar(i32 noundef signext)
+; CHECK: declare i8* @memchr(i8*, i32 signext, i64)
diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index 8e5d6f2..9fc7909 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -1270,8 +1270,8 @@ define <2 x i8> @ashr_add_sexts(<2 x i1> %x, <2 x i1> %y) {
 
 define i32 @cmp_math_sexts(i32 %x, i32 %y) {
 ; CHECK-LABEL: @cmp_math_sexts(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %gt = icmp ugt i32 %x, %y
@@ -1392,3 +1392,307 @@ define i8 @add_like_or_t2_extrause(i8 %x) {
   %r = add i8 %i1, 42
   ret i8 %r
 }
+
+define i8 @add_and_xor(i8 %x, i8 %y) {
+; CHECK-LABEL: @add_and_xor(
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %xor = xor i8 %x, -1
+  %and = and i8 %xor, %y
+  %add = add i8 %and, %x
+  ret i8 %add
+}
+
+define i8 @add_and_xor_wrong_const(i8 %x, i8 %y) {
+; CHECK-LABEL: @add_and_xor_wrong_const(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[X:%.*]], -2
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[XOR]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %xor = xor i8 %x, -2
+  %and = and i8 %xor, %y
+  %add = add i8 %and, %x
+  ret i8 %add
+}
+
+define i8 @add_and_xor_wrong_op(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @add_and_xor_wrong_op(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[Z:%.*]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[XOR]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[AND]], [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %xor = xor i8 %z, -1
+  %and = and i8 %xor, %y
+  %add = add i8 %and, %x
+  ret i8 %add
+}
+
+define i8 @add_and_xor_commuted1(i8 %x, i8 %_y) {
+; CHECK-LABEL: @add_and_xor_commuted1(
+; CHECK-NEXT:    [[Y:%.*]] = udiv i8 42, [[_Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %y = udiv i8 42, %_y ; thwart complexity-based canonicalization
+  %xor = xor i8 %x, -1
+  %and = and i8 %y, %xor
+  %add = add i8 %and, %x
+  ret i8 %add
+}
+
+define i8 @add_and_xor_commuted2(i8 %_x, i8 %y) {
+; CHECK-LABEL: @add_and_xor_commuted2(
+; CHECK-NEXT:    [[X:%.*]] = udiv i8 42, [[_X:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %x = udiv i8 42, %_x ; thwart complexity-based canonicalization
+  %xor = xor i8 %x, -1
+  %and = and i8 %xor, %y
+  %add = add i8 %x, %and
+  ret i8 %add
+}
+
+define i8 @add_and_xor_commuted3(i8 %_x, i8 %_y) {
+; CHECK-LABEL: @add_and_xor_commuted3(
+; CHECK-NEXT:    [[X:%.*]] = udiv i8 42, [[_X:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = udiv i8 42, [[_Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[X]], [[Y]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %x = udiv i8 42, %_x ; thwart complexity-based canonicalization
+  %y = udiv i8 42, %_y ; thwart complexity-based canonicalization
+  %xor = xor i8 %x, -1
+  %and = and i8 %y, %xor
+  %add = add i8 %x, %and
+  ret i8 %add
+}
+
+define i8 @add_and_xor_extra_use(i8 %x, i8 %y) {
+; CHECK-LABEL: @add_and_xor_extra_use(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    call void @use(i8 [[XOR]])
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[XOR]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[Y]], [[X]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %xor = xor i8 %x, -1
+  call void @use(i8 %xor)
+  %and = and i8 %xor, %y
+  call void @use(i8 %and)
+  %add = add i8 %and, %x
+  ret i8 %add
+}
+
+define i8 @add_xor_and_const(i8 %x) {
+; CHECK-LABEL: @add_xor_and_const(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], 42
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[AND]], 42
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[XOR]], [[X]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %and = and i8 %x, 42
+  %xor = xor i8 %and, 42
+  %add = add i8 %xor, %x
+  ret i8 %add
+}
+
+define i8 @add_xor_and_const_wrong_const(i8 %x) {
+; CHECK-LABEL: @add_xor_and_const_wrong_const(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], 42
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[AND]], 88
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[XOR]], [[X]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %and = and i8 %x, 42
+  %xor = xor i8 %and, 88
+  %add = add i8 %xor, %x
+  ret i8 %add
+}
+
+define i8 @add_xor_and_var(i8 %x, i8 %y) {
+; CHECK-LABEL: @add_xor_and_var(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[AND]], [[Y]]
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[XOR]], [[X]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %and = and i8 %x, %y
+  call void @use(i8 %and)
+  %xor = xor i8 %and, %y
+  %add = add i8 %xor, %x
+  ret i8 %add
+}
+
+define i8 @add_xor_and_var_wrong_op1(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @add_xor_and_var_wrong_op1(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[AND]], [[Z:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[XOR]], [[X]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %and = and i8 %x, %y
+  call void @use(i8 %and)
+  %xor = xor i8 %and, %z
+  %add = add i8 %xor, %x
+  ret i8 %add
+}
+
+define i8 @add_xor_and_var_wrong_op2(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @add_xor_and_var_wrong_op2(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[AND]], [[Y]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[XOR]], [[Z:%.*]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %and = and i8 %x, %y
+  call void @use(i8 %and)
+  %xor = xor i8 %and, %y
+  %add = add i8 %xor, %z
+  ret i8 %add
+}
+
+define i8 @add_xor_and_var_commuted1(i8 %x, i8 %y) {
+; CHECK-LABEL: @add_xor_and_var_commuted1(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[AND]], [[Y]]
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[XOR]], [[X]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %and = and i8 %y, %x
+  call void @use(i8 %and)
+  %xor = xor i8 %and, %y
+  %add = add i8 %xor, %x
+  ret i8 %add
+}
+
+define i8 @add_xor_and_var_commuted2(i8 %x, i8 %_y) {
+; CHECK-LABEL: @add_xor_and_var_commuted2(
+; CHECK-NEXT:    [[Y:%.*]] = udiv i8 42, [[_Y:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[Y]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[XOR]], [[X]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %y = udiv i8 42, %_y ; thwart complexity-based canonicalization
+  %and = and i8 %x, %y
+  call void @use(i8 %and)
+  %xor = xor i8 %y, %and
+  %add = add i8 %xor, %x
+  ret i8 %add
+}
+
+define i8 @add_xor_and_var_commuted3(i8 %x, i8 %_y) {
+; CHECK-LABEL: @add_xor_and_var_commuted3(
+; CHECK-NEXT:    [[Y:%.*]] = udiv i8 42, [[_Y:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[Y]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[XOR]], [[X]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %y = udiv i8 42, %_y ; thwart complexity-based canonicalization
+  %and = and i8 %y, %x
+  call void @use(i8 %and)
+  %xor = xor i8 %y, %and
+  %add = add i8 %xor, %x
+  ret i8 %add
+}
+
+define i8 @add_xor_and_var_commuted4(i8 %_x, i8 %y) {
+; CHECK-LABEL: @add_xor_and_var_commuted4(
+; CHECK-NEXT:    [[X:%.*]] = udiv i8 42, [[_X:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[AND]], [[Y]]
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[X]], [[XOR]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %x = udiv i8 42, %_x ; thwart complexity-based canonicalization
+  %and = and i8 %x, %y
+  call void @use(i8 %and)
+  %xor = xor i8 %and, %y
+  %add = add i8 %x, %xor
+  ret i8 %add
+}
+
+define i8 @add_xor_and_var_commuted5(i8 %_x, i8 %y) {
+; CHECK-LABEL: @add_xor_and_var_commuted5(
+; CHECK-NEXT:    [[X:%.*]] = udiv i8 42, [[_X:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[AND]], [[Y]]
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[X]], [[XOR]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %x = udiv i8 42, %_x ; thwart complexity-based canonicalization
+  %and = and i8 %y, %x
+  call void @use(i8 %and)
+  %xor = xor i8 %and, %y
+  %add = add i8 %x, %xor
+  ret i8 %add
+}
+
+define i8 @add_xor_and_var_commuted6(i8 %_x, i8 %_y) {
+; CHECK-LABEL: @add_xor_and_var_commuted6(
+; CHECK-NEXT:    [[X:%.*]] = udiv i8 42, [[_X:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = udiv i8 42, [[_Y:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[Y]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[Y]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[X]], [[XOR]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %x = udiv i8 42, %_x ; thwart complexity-based canonicalization
+  %y = udiv i8 42, %_y ; thwart complexity-based canonicalization
+  %and = and i8 %x, %y
+  call void @use(i8 %and)
+  %xor = xor i8 %y, %and
+  %add = add i8 %x, %xor
+  ret i8 %add
+}
+
+define i8 @add_xor_and_var_commuted7(i8 %_x, i8 %_y) {
+; CHECK-LABEL: @add_xor_and_var_commuted7(
+; CHECK-NEXT:    [[X:%.*]] = udiv i8 42, [[_X:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = udiv i8 42, [[_Y:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[Y]], [[X]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[Y]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[X]], [[XOR]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %x = udiv i8 42, %_x ; thwart complexity-based canonicalization
+  %y = udiv i8 42, %_y ; thwart complexity-based canonicalization
+  %and = and i8 %y, %x
+  call void @use(i8 %and)
+  %xor = xor i8 %y, %and
+  %add = add i8 %x, %xor
+  ret i8 %add
+}
+
+define i8 @add_xor_and_var_extra_use(i8 %x, i8 %y) {
+; CHECK-LABEL: @add_xor_and_var_extra_use(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[AND]], [[Y]]
+; CHECK-NEXT:    call void @use(i8 [[XOR]])
+; CHECK-NEXT:    [[ADD:%.*]] = or i8 [[XOR]], [[X]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %and = and i8 %x, %y
+  call void @use(i8 %and)
+  %xor = xor i8 %and, %y
+  call void @use(i8 %xor)
+  %add = add i8 %xor, %x
+  ret i8 %add
+}
diff --git a/llvm/test/Transforms/InstCombine/atomic.ll b/llvm/test/Transforms/InstCombine/atomic.ll
index ec79891..e3afffe 100644
--- a/llvm/test/Transforms/InstCombine/atomic.ll
+++ b/llvm/test/Transforms/InstCombine/atomic.ll
@@ -426,7 +426,6 @@ define void @no_atomic_vector_store(<2 x float> %p, i8* %p2) {
 
 define i32 @atomic_load_from_constant_global() {
 ; CHECK-LABEL: @atomic_load_from_constant_global(
-; CHECK-NEXT:    [[V:%.*]] = load atomic i32, i32* @c seq_cst, align 4
 ; CHECK-NEXT:    ret i32 42
 ;
   %v = load atomic i32, i32* @c seq_cst, align 4
@@ -435,7 +434,6 @@ define i32 @atomic_load_from_constant_global() {
 
 define i8 @atomic_load_from_constant_global_bitcast() {
 ; CHECK-LABEL: @atomic_load_from_constant_global_bitcast(
-; CHECK-NEXT:    [[V:%.*]] = load atomic i8, i8* bitcast (i32* @c to i8*) seq_cst, align 1
 ; CHECK-NEXT:    ret i8 42
 ;
   %v = load atomic i8, i8* bitcast (i32* @c to i8*) seq_cst, align 1
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-gep.ll b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
index 928409a4..ba38615 100644
--- a/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -126,7 +126,7 @@ declare void @use.ptr(i8*)
 
 define i8* @gep_sub_self() {
 ; CHECK-LABEL: @gep_sub_self(
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    ret i8* getelementptr (i8, i8* @g, i64 sub (i64 0, i64 ptrtoint (i8* @g to i64)))
 ;
   %p.int = ptrtoint i8* @g to i64
   %p.int.neg = sub i64 0, %p.int
@@ -136,7 +136,7 @@ define i8* @gep_sub_self() {
 
 define i8* @gep_sub_self_plus_addr(i64 %addr) {
 ; CHECK-LABEL: @gep_sub_self_plus_addr(
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, i8* null, i64 [[ADDR:%.*]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, i8* getelementptr (i8, i8* @g, i64 sub (i64 0, i64 ptrtoint (i8* @g to i64))), i64 [[ADDR:%.*]]
 ; CHECK-NEXT:    ret i8* [[P2]]
 ;
   %p.int = ptrtoint i8* @g to i64
@@ -164,7 +164,7 @@ define i8* @gep_plus_addr_sub_self_in_loop() {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[ADDR:%.*]] = call i64 @get.i64()
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, i8* null, i64 [[ADDR]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, i8* getelementptr (i8, i8* @g, i64 sub (i64 0, i64 ptrtoint (i8* @g to i64))), i64 [[ADDR]]
 ; CHECK-NEXT:    call void @use.ptr(i8* [[P2]])
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
@@ -182,7 +182,7 @@ loop:
 
 define i8* @gep_sub_other() {
 ; CHECK-LABEL: @gep_sub_other(
-; CHECK-NEXT:    ret i8* inttoptr (i64 sub (i64 ptrtoint (i8* @g to i64), i64 ptrtoint (i8* @g2 to i64)) to i8*)
+; CHECK-NEXT:    ret i8* getelementptr (i8, i8* @g, i64 sub (i64 0, i64 ptrtoint (i8* @g2 to i64)))
 ;
   %p.int = ptrtoint i8* @g2 to i64
   %p.int.neg = sub i64 0, %p.int
diff --git a/llvm/test/Transforms/InstCombine/double-float-shrink-1.ll b/llvm/test/Transforms/InstCombine/double-float-shrink-1.ll
index 13bf554..85c9a01 100644
--- a/llvm/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/llvm/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -346,7 +346,7 @@ define float @logb_test1(float %f)   {
 ; LINUX-NEXT:    [[LOGBF:%.*]] = call fast float @logbf(float [[F:%.*]])
 ; LINUX-NEXT:    ret float [[LOGBF]]
 ; MS32:          [[POWF:%.*]] = call fast double @logb(double [[F:%.*]])
-; MS64-NEXT:     [[LOGBF:%.*]] = call fast float @logbf(float [[F:%.*]])
+; MS64-NEXT:     [[LOGBF:%.*]] = call fast float @_logbf(float [[F:%.*]])
 ;
   %conv = fpext float %f to double
   %call = call fast double @logb(double %conv)
diff --git a/llvm/test/Transforms/InstCombine/pr39177.ll b/llvm/test/Transforms/InstCombine/pr39177.ll
index c13fc00..0671b5a 100644
--- a/llvm/test/Transforms/InstCombine/pr39177.ll
+++ b/llvm/test/Transforms/InstCombine/pr39177.ll
@@ -1,5 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+;
+; Check that SimplifyLibCalls do not (crash or) emit a library call if user
+; has made a function alias with the same name.
 
 %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
 %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
@@ -28,10 +30,8 @@ entry:
 
 define void @foo() {
 ; CHECK-LABEL: @foo(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @fwrite(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i64 7, i64 1, %struct._IO_FILE* [[TMP0]])
-; CHECK-NEXT:    ret void
+; CHECK-NOT:    call i64 @fwrite(
+; CHECK:        call {{.*}} @fprintf(
 ;
 entry:
   %retval = alloca i32, align 4
diff --git a/llvm/test/Transforms/InstCombine/pr55228.ll b/llvm/test/Transforms/InstCombine/pr55228.ll
new file mode 100644
index 0000000..5ef9f00
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr55228.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target datalayout = "p:8:8"
+
+@g = external global i8
+@c = constant ptr getelementptr inbounds (i8, ptr @g, i64 1)
+
+define i1 @test(ptr %p) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[P:%.*]], getelementptr inbounds (i8, ptr @g, i8 1)
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %alloca = alloca ptr
+  call void @llvm.memcpy.p0.p0.i32(ptr %alloca, ptr @c, i32 0, i1 false)
+  %load = load ptr, ptr %alloca
+  %cmp = icmp eq ptr %p, %load
+  ret i1 %cmp
+}
+
+declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)
diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
index fa686c9..4746587 100644
--- a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
@@ -242,5 +242,18 @@ define i4 @strlen(i8* %s) {
   ret i4 0
 }
 
+; Test emission of stpncpy.
+@a = dso_local global [4 x i8] c"123\00"
+@b = dso_local global [5 x i8] zeroinitializer
+declare i8* @__stpncpy_chk(i8* noundef, i8* noundef, i32 noundef, i32 noundef)
+define signext i32 @emit_stpncpy() {
+; CHECK-LABEL: @emit_stpncpy(
+; CHECK-NEXT: call i8* @stpncpy({{.*}} @b, {{.*}} @a, {{.*}} i32 2)
+  %call = call i8* @__stpncpy_chk(i8* noundef getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0),
+                                  i8* noundef getelementptr inbounds ([4 x i8], [4 x i8]* @a, i32 0, i32 0),
+                                  i32 noundef 2, i32 noundef 5)
+  ret i32 0
+}
+
 attributes #0 = { nobuiltin }
 attributes #1 = { builtin }
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
index 44da8b7..b08c1eb 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -1,15 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -O3 -mtriple=x86_64-- -mcpu=core2 | FileCheck %s
 
 declare i1 @check() nounwind
 declare i1 @foo(i8*, i8*, i8*) nounwind
 
 ; Check that redundant phi elimination ran
-; CHECK: @test
-; CHECK: %while.body.i
-; CHECK: movs
-; CHECK-NOT: movs
-; CHECK: %for.end.i
 define i32 @test(i8* %base) nounwind uwtable ssp {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset %rbx, -48
+; CHECK-NEXT:    .cfi_offset %r12, -40
+; CHECK-NEXT:    .cfi_offset %r13, -32
+; CHECK-NEXT:    .cfi_offset %r14, -24
+; CHECK-NEXT:    .cfi_offset %r15, -16
+; CHECK-NEXT:    movq %rdi, %r14
+; CHECK-NEXT:    leaq 16(%rdi), %r15
+; CHECK-NEXT:    movl $16, %eax
+; CHECK-NEXT:    xorl %r12d, %r12d
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_1: # %while.body.i
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB0_2 Depth 2
+; CHECK-NEXT:    movslq %r12d, %r13
+; CHECK-NEXT:    movq %rax, %r12
+; CHECK-NEXT:    leaq (%r15,%r13), %rbx
+; CHECK-NEXT:    addq $16, %r13
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_2: # %for.body.i
+; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    callq check@PLT
+; CHECK-NEXT:    incq %rbx
+; CHECK-NEXT:    testb $1, %al
+; CHECK-NEXT:    je .LBB0_2
+; CHECK-NEXT:  # %bb.3: # %for.end.i
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    addq %r14, %r13
+; CHECK-NEXT:    movq %r13, %rdi
+; CHECK-NEXT:    movq %r13, %rsi
+; CHECK-NEXT:    callq foo@PLT
+; CHECK-NEXT:    testb $1, %al
+; CHECK-NEXT:    je .LBB0_5
+; CHECK-NEXT:  # %bb.4: # %cond.true29.i
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    leaq 16(%r12), %rax
+; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:  .LBB0_5: # %cond.false35.i
 entry:
   br label %while.body.lr.ph.i
 
@@ -58,11 +104,20 @@ exit:                                 ; preds = %cond.true29.i, %cond.true.i
 ; Test phi reuse after LSR that requires SCEVExpander to hoist an
 ; interesting GEP.
 ;
-; CHECK: @test2
-; CHECK: %entry
-; CHECK-NOT: mov
-; CHECK: je
 define void @test2(i32 %n) nounwind uwtable {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    je .LBB1_1
+; CHECK-NEXT:  # %bb.3: # %while.end
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB1_1: # %for.cond468.preheader
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB1_2: # %for.inc498
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    jmp .LBB1_2
 entry:
   br i1 undef, label %while.end, label %for.cond468
 
@@ -95,16 +150,36 @@ while.end:                                        ; preds = %entry
 ; Test redundant phi elimination when the deleted phi's increment is
 ; itself a phi.
 ;
-; CHECK: @test3
-; CHECK: %meshBB1
-; CHECK: %meshBB
-; CHECK-NEXT: Parent Loop
-; CHECK-NEXT: Inner Loop
-; CHECK-NEXT: incq
-; CHECK: testb
-; CHECK: je
-; CHECK: jmp
 define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp {
+; CHECK-LABEL: test3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jne .LBB2_6
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB2_2: # %meshBB1
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB2_4 Depth 2
+; CHECK-NEXT:    # implicit-def: $rcx
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jne .LBB2_2
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB2_4: # %meshBB
+; CHECK-NEXT:    # Parent Loop BB2_2 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    incq %rcx
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    je .LBB2_4
+; CHECK-NEXT:    jmp .LBB2_2
+; CHECK-NEXT:  .LBB2_6: # %meshBB5
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    je .LBB2_5
+; CHECK-NEXT:  # %bb.7: # %eval_At_times_u.exit
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB2_5: # %for.inc8.us.i2
 entry:
   br i1 undef, label %meshBB1, label %meshBB5
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
index feafd88..56d8a07 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -320,10 +320,20 @@ for.end:
 
 ; ADD (with reduction stored in invariant address)
 
-; CHECK-REMARK: loop not vectorized: value that could not be identified as reduction is used outside the loop
+; CHECK-REMARK: vectorized loop (vectorization width: vscale x 4, interleaved count: 2)
 define void @invariant_store(i32* %dst, i32* readonly %src) {
 ; CHECK-LABEL: @invariant_store
-; CHECK-NOT: vector.body
+; CHECK: vector.body:
+; CHECK: %[[LOAD1:.*]] = load <vscale x 4 x i32>
+; CHECK: %[[LOAD2:.*]] = load <vscale x 4 x i32>
+; CHECK: %[[ADD1:.*]] = add <vscale x 4 x i32> %{{.*}}, %[[LOAD1]]
+; CHECK: %[[ADD2:.*]] = add <vscale x 4 x i32> %{{.*}}, %[[LOAD2]]
+; CHECK: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[ADD1]]
+; CHECK: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[ADD2]]
+; CHECK: middle.block:
+; CHECK: %[[ADD:.*]] = add <vscale x 4 x i32> %[[ADD2]], %[[ADD1]]
+; CHECK-NEXT: %[[SUM:.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %[[ADD]])
+; CHECK-NEXT: store i32 %[[SUM]], i32* %gep.dst, align 4
 entry:
   %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
   store i32 0, i32* %gep.dst, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
index a0dba42..6212d82 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -1139,10 +1139,10 @@ define float @fmuladd_scalar_vf(float* %a, float* %b, i64 %n) {
 ; CHECK-UNORDERED: [[LOAD5:%.*]] = load float, float*
 ; CHECK-UNORDERED: [[LOAD6:%.*]] = load float, float*
 ; CHECK-UNORDERED: [[LOAD7:%.*]] = load float, float*
-; CHECK-UNORDERED: [[FMULADD]] = call float @llvm.fmuladd.f32(float [[LOAD]], float [[LOAD4]], float [[VEC_PHI]])
-; CHECK-UNORDERED: [[FMULADD1]] = call float @llvm.fmuladd.f32(float [[LOAD1]], float [[LOAD5]], float [[VEC_PHI1]])
-; CHECK-UNORDERED: [[FMULADD2]] = call float @llvm.fmuladd.f32(float [[LOAD2]], float [[LOAD6]], float [[VEC_PHI2]])
-; CHECK-UNORDERED: [[FMULADD3]] = call float @llvm.fmuladd.f32(float [[LOAD3]], float [[LOAD7]], float [[VEC_PHI3]])
+; CHECK-UNORDERED: [[FMULADD]] = tail call float @llvm.fmuladd.f32(float [[LOAD]], float [[LOAD4]], float [[VEC_PHI]])
+; CHECK-UNORDERED: [[FMULADD1]] = tail call float @llvm.fmuladd.f32(float [[LOAD1]], float [[LOAD5]], float [[VEC_PHI1]])
+; CHECK-UNORDERED: [[FMULADD2]] = tail call float @llvm.fmuladd.f32(float [[LOAD2]], float [[LOAD6]], float [[VEC_PHI2]])
+; CHECK-UNORDERED: [[FMULADD3]] = tail call float @llvm.fmuladd.f32(float [[LOAD3]], float [[LOAD7]], float [[VEC_PHI3]])
 ; CHECK-UNORDERED-NOT: llvm.vector.reduce.fadd
 ; CHECK-UNORDERED: middle.block:
 ; CHECK-UNORDERED: [[BIN_RDX:%.*]] = fadd float [[FMULADD1]], [[FMULADD]]
diff --git a/llvm/test/Transforms/LoopVectorize/fpsat.ll b/llvm/test/Transforms/LoopVectorize/fpsat.ll
new file mode 100644
index 0000000..7e3f743
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/fpsat.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+
+define void @signed(ptr %x, ptr %y, i32 %n) {
+; CHECK-LABEL: @signed(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[Y:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[Y]], [[UGLYGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[X]], [[UGLYGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[TMP8]])
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 [[TMP9]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %1 = tail call i32 @llvm.fptosi.sat.i32.f32(float %0)
+  %arrayidx2 = getelementptr inbounds i32, ptr %y, i64 %indvars.iv
+  store i32 %1, ptr %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @unsigned(ptr %x, ptr %y, i32 %n) {
+; CHECK-LABEL: @unsigned(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[Y:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[Y]], [[UGLYGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[X]], [[UGLYGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope !8
+; CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4, !alias.scope !11, !noalias !8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[TMP8]])
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 [[TMP9]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %1 = tail call i32 @llvm.fptoui.sat.i32.f32(float %0)
+  %arrayidx2 = getelementptr inbounds i32, ptr %y, i64 %indvars.iv
+  store i32 %1, ptr %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+declare i32 @llvm.fptosi.sat.i32.f32(float)
+declare i32 @llvm.fptoui.sat.i32.f32(float)
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-with-call.ll b/llvm/test/Transforms/LoopVectorize/interleave-with-call.ll
index e92400e..5cca6a6 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-with-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-with-call.ll
@@ -12,7 +12,7 @@
 ; CHECK-NEXT:   vector.body:
 ; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:     vp<[[IV_STEPS:%.]]>    = SCALAR-STEPS vp<[[CAN_IV]]>, ir<%start>, ir<1>
-; CHECK-NEXT:     WIDEN-CALL ir<%min> = call @llvm.smin.i32(vp<[[IV_STEPS]]>, ir<65535>)
+; CHECK-NEXT:     CLONE ir<%min> = call vp<[[IV_STEPS]]>, ir<65535>, ir<@llvm.smin.i32>
 ; CHECK-NEXT:     CLONE ir<%arrayidx> = getelementptr ir<%dst>, vp<[[IV_STEPS]]>
 ; CHECK-NEXT:     CLONE store ir<%min>, ir<%arrayidx>
 ; CHECK-NEXT:     EMIT vp<[[INC:%.+]]> = VF * UF +(nuw)  vp<[[CAN_IV]]>
@@ -27,8 +27,8 @@ define void @test(i32 %start, ptr %dst) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i32 %start, [[INDEX]]
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[INDUCTION1:%.*]] = add i32 [[OFFSET_IDX]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[INDUCTION]], i32 65535)
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[INDUCTION1]], i32 65535)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.smin.i32(i32 [[INDUCTION]], i32 65535)
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[INDUCTION1]], i32 65535)
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDUCTION]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDUCTION1]]
 ; CHECK-NEXT:    store i32 [[TMP1]], ptr [[TMP3]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
index d002b1b..862a384 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
@@ -11,7 +11,23 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;   dst[42] = sum;
 ; }
 ; CHECK-LABEL: @reduc_store
-; CHECK-NOT: vector.body
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP4]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT:    store i32 [[TMP6]], i32* [[GEP_DST:%.*]], align 4
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1000, 1000
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
 define void @reduc_store(i32* %dst, i32* readonly %src) {
 entry:
   %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
@@ -41,7 +57,14 @@ exit:
 ;   dst[42] = sum;
 ; }
 ; CHECK-LABEL: @reduc_store_fadd_fast
-; CHECK-NOT: vector.body
+; CHECK: vector.body:
+; CHECK: phi <4 x float>
+; CHECK: load <4 x float>
+; CHECK: fadd fast <4 x float>
+; CHECK-NOT: store float %{{[0-9]+}}, float* %gep.dst
+; CHECK: middle.block:
+; CHECK-NEXT: [[TMP:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32
+; CHECK-NEXT: store float %{{[0-9]+}}, float* %gep.dst
 define void @reduc_store_fadd_fast(float* %dst, float* readonly %src) {
 entry:
   %gep.dst = getelementptr inbounds float, float* %dst, i64 42
@@ -152,7 +175,55 @@ for.end:
 ;    dst[42] = sum;
 ;  }
 ; CHECK-LABEL: @reduc_store_inside_unrolled
-; CHECK-NOT: vector.body
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4, !alias.scope !11
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 4, !alias.scope !11
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP6]], align 4, !alias.scope !11
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP7]], align 4, !alias.scope !11
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i32 1
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 2
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP11]], i32 3
+; CHECK-NEXT:    [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP17:%.*]] = or <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i64> [[TMP17]], i32 0
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP18]]
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP20]]
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x i64> [[TMP17]], i32 2
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP24]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP19]], align 4, !alias.scope !11
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, i32* [[TMP21]], align 4, !alias.scope !11
+; CHECK-NEXT:    [[TMP28:%.*]] = load i32, i32* [[TMP23]], align 4, !alias.scope !11
+; CHECK-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP25]], align 4, !alias.scope !11
+; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
+; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP27]], i32 1
+; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP28]], i32 2
+; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 3
+; CHECK-NEXT:    [[TMP34]] = add <4 x i32> [[TMP33]], [[TMP16]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT:    [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; CHECK-NEXT:    br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP36:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP34]])
+; CHECK-NEXT:    store i32 [[TMP36]], i32* [[GEP_DST:%.*]], align 4
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 500, 500
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
 define void @reduc_store_inside_unrolled(i32* %dst, i32* readonly %src) {
 entry:
   %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
@@ -160,7 +231,7 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %sum.1, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.2, %for.body ]
   %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
   %0 = load i32, i32* %gep.src, align 4
   %sum.1 = add nsw i32 %0, %sum
@@ -178,6 +249,38 @@ exit:
   ret void
 }
 
+; Check that we cannot vectorize code if stored value is not the final reduction
+; value
+;
+;  int sum = 0;
+;  for(int i=0; i < 1000; i++) {
+;    sum += src[i];
+;    dst[42] = sum + 1;
+;  }
+; CHECK-LABEL: @reduc_store_not_final_value
+; CHECK-NOT: vector.body:
+define void @reduc_store_not_final_value(i32* %dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  store i32 0, i32* %gep.dst, align 4
+  br label %for.body
+
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %gep.src, align 4
+  %add = add nsw i32 %sum, %0
+  %sum_plus_one = add i32 %add, 1
+  store i32 %sum_plus_one, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
 ; We cannot vectorize if two (or more) invariant stores exist in a loop.
 ;
 ;  int sum = 0;
@@ -197,7 +300,7 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %sum.1, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.2, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
   %0 = load i32, i32* %arrayidx, align 4
   %sum.1 = add nsw i32 %0, %sum
@@ -224,7 +327,12 @@ exit:
 ;    dst[42] = sum;
 ;  }
 ; CHECK-LABEL: @reduc_store_middle_store_predicated
-; CHECK-NOT: vector.body
+; CHECK: vector.body:
+; CHECK-NOT: store i32 %{{[0-9]+}}, i32* %gep.dst
+; CHECK: middle.block:
+; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.vector.reduce.add.v4i32
+; CHECK-NEXT: store i32 [[TMP]], i32* %gep.dst
+; CHECK: ret void
 define void @reduc_store_middle_store_predicated(i32* %dst, i32* readonly %src) {
 entry:
   %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
@@ -299,6 +407,36 @@ exit:                                 ; preds = %latch
   ret void
 }
 
+; Final reduction value is overwritten inside loop
+;
+; for(int i=0; i < 1000; i++) {
+;   sum += src[i];
+;   dst[42] = sum;
+;   dst[42] = 0;
+; }
+; CHECK-LABEL: @reduc_store_final_store_overwritten
+; CHECK-NOT: vector.body:
+define void @reduc_store_final_store_overwritten(i32* %dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  br label %for.body
+
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %gep.src, align 4
+  %add = add nsw i32 %sum, %0
+  store i32 %add, i32* %gep.dst, align 4
+  store i32 0, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
 ; Final value used outside of loop does not prevent vectorization
 ;
 ; int sum = 0;
@@ -308,7 +446,16 @@ exit:                                 ; preds = %latch
 ; }
 ; dst[43] = sum;
 ; CHECK-LABEL: @reduc_store_inoutside
-; CHECK-NOT: vector.body
+; CHECK: vector.body:
+; CHECK-NOT: store i32 %{{[0-9]+}}, i32* %gep.src
+; CHECK: middle.block:
+; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.vector.reduce.add.v4i32
+; CHECK-NEXT: store i32 [[TMP]], i32* %gep.dst
+; CHECK: exit:
+; CHECK: [[PHI:%.*]] = phi i32 [ [[TMP1:%.*]], %for.body ], [ [[TMP2:%.*]], %middle.block ]
+; CHECK: [[ADDR:%.*]] = getelementptr inbounds i32, i32* %dst, i64 43
+; CHECK: store i32 [[PHI]], i32* [[ADDR]]
+; CHECK: ret void
 define void @reduc_store_inoutside(i32* %dst, i32* readonly %src) {
 entry:
   %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 2582791..81593f9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -146,6 +146,50 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %red.next
 }
 
+define void @print_reduction_with_invariant_store(i64 %n, float* noalias %y, float* noalias %dst) {
+; CHECK-LABEL: Checking a loop in 'print_reduction_with_invariant_store'
+; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
+; CHECK-NEXT:   WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next>
+; CHECK-NEXT:   vp<[[IV:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1>
+; CHECK-NEXT:   CLONE ir<%arrayidx> = getelementptr ir<%y>, vp<[[IV]]>
+; CHECK-NEXT:   WIDEN ir<%lv> = load ir<%arrayidx>
+; CHECK-NEXT:   REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop)
+; CHECK-NEXT:   EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF +(nuw) vp<[[CAN_IV]]>
+; CHECK-NEXT:   EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+  %red = phi float [ %red.next, %for.body ], [ 0.0, %entry ]
+  %arrayidx = getelementptr inbounds float, float* %y, i64 %iv
+  %lv = load float, float* %arrayidx, align 4
+  %red.next = fadd fast float %lv, %red
+  store float %red.next, float* %dst, align 4
+  %iv.next = add i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
 define void @print_replicate_predicated_phi(i64 %n, i64* %x) {
 ; CHECK-LABEL: Checking a loop in 'print_replicate_predicated_phi'
 ; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
index 76e0f39..00ac126 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
@@ -90,23 +90,16 @@ return:
 define float @test_merge_anyof_v4sf(<4 x float> %t) {
 ; CHECK-LABEL: @test_merge_anyof_v4sf(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[T_FR:%.*]] = freeze <4 x float> [[T:%.*]]
-; CHECK-NEXT:    [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i4 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[T_FR]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
-; CHECK-NEXT:    [[OR_COND3:%.*]] = or i1 [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
-; CHECK-NEXT:    [[OR_COND4:%.*]] = or i1 [[OR_COND3]], [[TMP5]]
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
-; CHECK-NEXT:    [[OR_COND5:%.*]] = or i1 [[OR_COND4]], [[TMP6]]
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
-; CHECK-NEXT:    [[OR_COND6:%.*]] = or i1 [[OR_COND5]], [[TMP7]]
-; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[SHIFT]], [[T_FR]]
-; CHECK-NEXT:    [[ADD:%.*]] = extractelement <4 x float> [[TMP8]], i64 0
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]]
+; CHECK-NEXT:    [[T_FR7:%.*]] = freeze <4 x float> [[T:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR7]], zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt <4 x float> [[T_FR7]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i1> [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i4 [[TMP3]], 0
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR7]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x float> [[SHIFT]], [[T_FR7]]
+; CHECK-NEXT:    [[ADD:%.*]] = extractelement <4 x float> [[TMP4]], i64 0
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[ADD]], float 0.000000e+00
 ; CHECK-NEXT:    ret float [[RETVAL_0]]
 ;
 entry:
@@ -419,24 +412,16 @@ return:
 define float @test_merge_anyof_v4si(<4 x i32> %t) {
 ; CHECK-LABEL: @test_merge_anyof_v4si(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i4 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR]], <i32 255, i32 255, i32 255, i32 255>
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
-; CHECK-NEXT:    [[OR_COND3:%.*]] = or i1 [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
-; CHECK-NEXT:    [[OR_COND4:%.*]] = or i1 [[OR_COND3]], [[TMP5]]
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
-; CHECK-NEXT:    [[OR_COND5:%.*]] = or i1 [[OR_COND4]], [[TMP6]]
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
-; CHECK-NEXT:    [[OR_COND6:%.*]] = or i1 [[OR_COND5]], [[TMP7]]
-; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR]]
-; CHECK-NEXT:    [[ADD:%.*]] = extractelement <4 x i32> [[TMP8]], i64 0
+; CHECK-NEXT:    [[T_FR7:%.*]] = freeze <4 x i32> [[T:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add <4 x i32> [[T_FR7]], <i32 -256, i32 -256, i32 -256, i32 -256>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i32> [[TMP0]], <i32 -255, i32 -255, i32 -255, i32 -255>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i1> [[TMP1]] to i4
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i4 [[TMP2]], 0
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR7]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR7]]
+; CHECK-NEXT:    [[ADD:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[ADD]] to float
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[CONV]]
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[CONV]], float 0.000000e+00
 ; CHECK-NEXT:    ret float [[RETVAL_0]]
 ;
 entry:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index 3dd7578..db4f06b 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -41,7 +41,7 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
 
 define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 4, i32 2, i32 5, i32 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll
new file mode 100644
index 0000000..90eaa71
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -slp-vectorizer -mtriple=aarch64-none-eabi < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define void @signed(ptr %x, ptr %y, i32 %n) {
+; CHECK-LABEL: @signed(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]])
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l0 = load float, ptr %x, align 4
+  %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
+  %l2 = load float, ptr %arrayidx.1, align 4
+  %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
+  %l4 = load float, ptr %arrayidx.2, align 4
+  %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
+  %l6 = load float, ptr %arrayidx.3, align 4
+  %l1 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l0)
+  %l3 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l2)
+  %l5 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l4)
+  %l7 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l6)
+  store i32 %l1, ptr %y, align 4
+  %arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1
+  store i32 %l3, ptr %arrayidx2.1, align 4
+  %arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2
+  store i32 %l5, ptr %arrayidx2.2, align 4
+  %arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3
+  store i32 %l7, ptr %arrayidx2.3, align 4
+  ret void
+}
+
+define void @unsigned(ptr %x, ptr %y, i32 %n) {
+; CHECK-LABEL: @unsigned(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]])
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l0 = load float, ptr %x, align 4
+  %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
+  %l2 = load float, ptr %arrayidx.1, align 4
+  %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
+  %l4 = load float, ptr %arrayidx.2, align 4
+  %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
+  %l6 = load float, ptr %arrayidx.3, align 4
+  %l1 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l0)
+  %l3 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l2)
+  %l5 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l4)
+  %l7 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l6)
+  store i32 %l1, ptr %y, align 4
+  %arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1
+  store i32 %l3, ptr %arrayidx2.1, align 4
+  %arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2
+  store i32 %l5, ptr %arrayidx2.2, align 4
+  %arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3
+  store i32 %l7, ptr %arrayidx2.3, align 4
+  ret void
+}
+
+declare i32 @llvm.fptosi.sat.i32.f32(float)
+declare i32 @llvm.fptoui.sat.i32.f32(float)
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
index 01d743f..4142e31 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
@@ -15,10 +15,10 @@ define void @PR28330(i32 %n) {
 ; DEFAULT-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; DEFAULT-NEXT:    br label [[FOR_BODY:%.*]]
 ; DEFAULT:       for.body:
-; DEFAULT-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; DEFAULT-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; DEFAULT-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
 ; DEFAULT-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
-; DEFAULT-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
+; DEFAULT-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], [[P17]]
 ; DEFAULT-NEXT:    br label [[FOR_BODY]]
 ;
 ; GATHER-LABEL: @PR28330(
@@ -27,10 +27,10 @@ define void @PR28330(i32 %n) {
 ; GATHER-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
 ; GATHER:       for.body:
-; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; GATHER-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
 ; GATHER-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
-; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
+; GATHER-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], [[P17]]
 ; GATHER-NEXT:    br label [[FOR_BODY]]
 ;
 ; MAX-COST-LABEL: @PR28330(
@@ -39,10 +39,10 @@ define void @PR28330(i32 %n) {
 ; MAX-COST-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; MAX-COST-NEXT:    br label [[FOR_BODY:%.*]]
 ; MAX-COST:       for.body:
-; MAX-COST-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; MAX-COST-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; MAX-COST-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
 ; MAX-COST-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
-; MAX-COST-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
+; MAX-COST-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], [[P17]]
 ; MAX-COST-NEXT:    br label [[FOR_BODY]]
 ;
 entry:
@@ -92,10 +92,10 @@ define void @PR32038(i32 %n) {
 ; DEFAULT-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; DEFAULT-NEXT:    br label [[FOR_BODY:%.*]]
 ; DEFAULT:       for.body:
-; DEFAULT-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; DEFAULT-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; DEFAULT-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
 ; DEFAULT-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
-; DEFAULT-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], -5
+; DEFAULT-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], -5
 ; DEFAULT-NEXT:    br label [[FOR_BODY]]
 ;
 ; GATHER-LABEL: @PR32038(
@@ -104,10 +104,10 @@ define void @PR32038(i32 %n) {
 ; GATHER-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
 ; GATHER:       for.body:
-; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; GATHER-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
 ; GATHER-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
-; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], -5
+; GATHER-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], -5
 ; GATHER-NEXT:    br label [[FOR_BODY]]
 ;
 ; MAX-COST-LABEL: @PR32038(
@@ -116,10 +116,10 @@ define void @PR32038(i32 %n) {
 ; MAX-COST-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; MAX-COST-NEXT:    br label [[FOR_BODY:%.*]]
 ; MAX-COST:       for.body:
-; MAX-COST-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; MAX-COST-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; MAX-COST-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
 ; MAX-COST-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
-; MAX-COST-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], -5
+; MAX-COST-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], -5
 ; MAX-COST-NEXT:    br label [[FOR_BODY]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
index 24c69fb..12a2cd2 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
@@ -204,10 +204,10 @@ define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) {
 
 define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
 ; CHECK-LABEL: @reduction_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 7, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 3, i32 6>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 2, i32 7>
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], <i32 15, i32 15, i32 15, i32 15>
 ; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i32> [[TMP6]], <i32 65537, i32 65537, i32 65537, i32 65537>
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
index 962f8ca..03a29f4 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
@@ -204,10 +204,10 @@ define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) {
 
 define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
 ; CHECK-LABEL: @reduction_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 7, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 3, i32 6>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 2, i32 7>
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], <i32 15, i32 15, i32 15, i32 15>
 ; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i32> [[TMP6]], <i32 65537, i32 65537, i32 65537, i32 65537>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
index 6817269..0d20a0e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
@@ -8,7 +8,7 @@ define void @mainTest(i32* %ptr) #0  {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[PTR:%.*]], null
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP:%.*]], label [[BAIL_OUT:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[OP_RDX3:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -17,10 +17,10 @@ define void @mainTest(i32* %ptr) #0  {
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP3]] to i64
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP7]], 1
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = add i32 [[OP_EXTRA]], [[TMP4]]
-; CHECK-NEXT:    [[OP_EXTRA2:%.*]] = add i32 [[OP_EXTRA1]], [[TMP3]]
-; CHECK-NEXT:    [[OP_EXTRA3]] = add i32 [[OP_EXTRA2]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = add i32 [[TMP7]], [[TMP4]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], [[TMP3]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = add i32 [[OP_RDX1]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX3]] = add i32 [[OP_RDX2]], 1
 ; CHECK-NEXT:    br label [[LOOP]]
 ; CHECK:       bail_out:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
index d15494e..5455914 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
@@ -7,20 +7,20 @@ define void @test() #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_RDX1:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], <i64 3, i64 2, i64 1, i64 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], <i64 2, i64 3, i64 1, i64 0>
 ; CHECK-NEXT:    [[TMP3]] = extractelement <4 x i64> [[TMP2]], i32 3
 ; CHECK-NEXT:    [[DUMMY_ADD:%.*]] = add i16 0, 0
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1
 ; CHECK-NEXT:    [[DUMMY_SHL:%.*]] = shl i64 [[TMP4]], 32
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP2]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], <i64 32, i64 32, i64 32, i64 32>
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i64 [[TMP7]], 0
-; CHECK-NEXT:    [[OP_EXTRA1]] = add i64 [[OP_EXTRA]], [[TMP3]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = add i64 [[TMP7]], [[TMP3]]
+; CHECK-NEXT:    [[OP_RDX1]] = add i64 [[OP_RDX]], 0
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
index 44f1fb1..318d452 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
@@ -5,98 +5,98 @@
 define void @Test(i32) {
 ; CHECK-LABEL: @Test(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP0]], i32 4
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP0]], i32 5
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP0]], i32 6
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP0]], i32 7
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[TMP0]], i32 4
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[TMP0]], i32 5
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[TMP0]], i32 6
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP0]], i32 7
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <16 x i32> [[TMP16]], i32 [[TMP0]], i32 8
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <16 x i32> [[TMP17]], i32 [[TMP0]], i32 9
+; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <16 x i32> [[TMP18]], i32 [[TMP0]], i32 10
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <16 x i32> [[TMP19]], i32 [[TMP0]], i32 11
+; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <16 x i32> [[TMP20]], i32 [[TMP0]], i32 12
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[TMP0]], i32 13
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[TMP0]], i32 14
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[TMP0]], i32 15
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP10:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0:%.*]]
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA2:%.*]] = and i32 [[OP_EXTRA1]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA2]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
-; CHECK-NEXT:    [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[OP_EXTRA26]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP2]], i32 1
-; CHECK-NEXT:    [[TMP8:%.*]] = and <2 x i32> [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = add <2 x i32> [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP10]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP25:%.*]] = phi <2 x i32> [ [[TMP36:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP25]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
+; CHECK-NEXT:    [[TMP27:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>
+; CHECK-NEXT:    [[TMP28:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP24]])
+; CHECK-NEXT:    [[TMP29:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP8]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = and i32 [[TMP28]], [[TMP29]]
+; CHECK-NEXT:    [[TMP30:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP27]])
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[TMP30]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = and i32 [[OP_RDX1]], [[TMP0]]
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = and i32 [[OP_RDX2]], [[TMP0]]
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = and i32 [[OP_RDX3]], [[TMP0]]
+; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[OP_RDX4]], i32 0
+; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <2 x i32> poison, i32 [[TMP26]], i32 0
+; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <2 x i32> [[TMP32]], i32 [[TMP26]], i32 1
+; CHECK-NEXT:    [[TMP34:%.*]] = and <2 x i32> [[TMP31]], [[TMP33]]
+; CHECK-NEXT:    [[TMP35:%.*]] = add <2 x i32> [[TMP31]], [[TMP33]]
+; CHECK-NEXT:    [[TMP36]] = shufflevector <2 x i32> [[TMP34]], <2 x i32> [[TMP35]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 ; FORCE_REDUCTION-LABEL: @Test(
 ; FORCE_REDUCTION-NEXT:  entry:
+; FORCE_REDUCTION-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0:%.*]], i32 0
+; FORCE_REDUCTION-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP0]], i32 1
+; FORCE_REDUCTION-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP0]], i32 2
+; FORCE_REDUCTION-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP0]], i32 3
+; FORCE_REDUCTION-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP0]], i32 4
+; FORCE_REDUCTION-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP0]], i32 5
+; FORCE_REDUCTION-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP0]], i32 6
+; FORCE_REDUCTION-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP0]], i32 7
+; FORCE_REDUCTION-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> poison, i32 [[TMP0]], i32 0
+; FORCE_REDUCTION-NEXT:    [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[TMP0]], i32 1
+; FORCE_REDUCTION-NEXT:    [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[TMP0]], i32 2
+; FORCE_REDUCTION-NEXT:    [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[TMP0]], i32 3
+; FORCE_REDUCTION-NEXT:    [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[TMP0]], i32 4
+; FORCE_REDUCTION-NEXT:    [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[TMP0]], i32 5
+; FORCE_REDUCTION-NEXT:    [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[TMP0]], i32 6
+; FORCE_REDUCTION-NEXT:    [[TMP16:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP0]], i32 7
+; FORCE_REDUCTION-NEXT:    [[TMP17:%.*]] = insertelement <16 x i32> [[TMP16]], i32 [[TMP0]], i32 8
+; FORCE_REDUCTION-NEXT:    [[TMP18:%.*]] = insertelement <16 x i32> [[TMP17]], i32 [[TMP0]], i32 9
+; FORCE_REDUCTION-NEXT:    [[TMP19:%.*]] = insertelement <16 x i32> [[TMP18]], i32 [[TMP0]], i32 10
+; FORCE_REDUCTION-NEXT:    [[TMP20:%.*]] = insertelement <16 x i32> [[TMP19]], i32 [[TMP0]], i32 11
+; FORCE_REDUCTION-NEXT:    [[TMP21:%.*]] = insertelement <16 x i32> [[TMP20]], i32 [[TMP0]], i32 12
+; FORCE_REDUCTION-NEXT:    [[TMP22:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[TMP0]], i32 13
+; FORCE_REDUCTION-NEXT:    [[TMP23:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[TMP0]], i32 14
+; FORCE_REDUCTION-NEXT:    [[TMP24:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[TMP0]], i32 15
 ; FORCE_REDUCTION-NEXT:    br label [[LOOP:%.*]]
 ; FORCE_REDUCTION:       loop:
-; FORCE_REDUCTION-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP12:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
-; FORCE_REDUCTION-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
-; FORCE_REDUCTION-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
-; FORCE_REDUCTION-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240>
-; FORCE_REDUCTION-NEXT:    [[VAL_20:%.*]] = add i32 [[TMP2]], 1496
-; FORCE_REDUCTION-NEXT:    [[VAL_34:%.*]] = add i32 [[TMP2]], 8555
-; FORCE_REDUCTION-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP3]])
-; FORCE_REDUCTION-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]]
-; FORCE_REDUCTION-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0:%.*]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA1:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA2:%.*]] = and i32 [[OP_EXTRA1]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA2]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP2]]
-; FORCE_REDUCTION-NEXT:    [[VAL_39:%.*]] = add i32 [[TMP2]], 12529
-; FORCE_REDUCTION-NEXT:    [[VAL_40:%.*]] = and i32 [[OP_EXTRA27]], [[VAL_39]]
-; FORCE_REDUCTION-NEXT:    [[VAL_41:%.*]] = add i32 [[TMP2]], 13685
-; FORCE_REDUCTION-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[VAL_40]], i32 0
-; FORCE_REDUCTION-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1
-; FORCE_REDUCTION-NEXT:    [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[VAL_41]], i32 0
-; FORCE_REDUCTION-NEXT:    [[TMP10:%.*]] = and <2 x i32> [[TMP8]], [[TMP9]]
-; FORCE_REDUCTION-NEXT:    [[TMP11:%.*]] = add <2 x i32> [[TMP8]], [[TMP9]]
-; FORCE_REDUCTION-NEXT:    [[TMP12]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> <i32 0, i32 3>
+; FORCE_REDUCTION-NEXT:    [[TMP25:%.*]] = phi <2 x i32> [ [[TMP36:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; FORCE_REDUCTION-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP25]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; FORCE_REDUCTION-NEXT:    [[TMP26:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>
+; FORCE_REDUCTION-NEXT:    [[TMP27:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP24]])
+; FORCE_REDUCTION-NEXT:    [[TMP28:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP8]])
+; FORCE_REDUCTION-NEXT:    [[TMP29:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP26]])
+; FORCE_REDUCTION-NEXT:    [[OP_RDX13:%.*]] = and i32 [[TMP29]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_RDX14:%.*]] = and i32 [[OP_RDX13]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_RDX15:%.*]] = and i32 [[OP_RDX14]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_RDX16:%.*]] = and i32 [[OP_RDX15]], [[TMP27]]
+; FORCE_REDUCTION-NEXT:    [[OP_RDX17:%.*]] = and i32 [[OP_RDX16]], [[TMP28]]
+; FORCE_REDUCTION-NEXT:    [[TMP30:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[OP_RDX17]], i32 0
+; FORCE_REDUCTION-NEXT:    [[TMP31:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
+; FORCE_REDUCTION-NEXT:    [[TMP32:%.*]] = insertelement <2 x i32> poison, i32 [[TMP31]], i32 0
+; FORCE_REDUCTION-NEXT:    [[TMP33:%.*]] = insertelement <2 x i32> [[TMP32]], i32 [[TMP31]], i32 1
+; FORCE_REDUCTION-NEXT:    [[TMP34:%.*]] = and <2 x i32> [[TMP30]], [[TMP33]]
+; FORCE_REDUCTION-NEXT:    [[TMP35:%.*]] = add <2 x i32> [[TMP30]], [[TMP33]]
+; FORCE_REDUCTION-NEXT:    [[TMP36]] = shufflevector <2 x i32> [[TMP34]], <2 x i32> [[TMP35]], <2 x i32> <i32 0, i32 3>
 ; FORCE_REDUCTION-NEXT:    br label [[LOOP]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
index f878bda..ba0585f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
@@ -4,20 +4,20 @@
 define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
 ; CHECK-LABEL: @mainTest(
 ; CHECK-NEXT:  bci_15.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 31, i32 poison>, i32 [[PARAM:%.*]], i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 31>, i32 [[PARAM:%.*]], i32 0
 ; CHECK-NEXT:    br label [[BCI_15:%.*]]
 ; CHECK:       bci_15:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = add <16 x i32> [[SHUFFLE]], <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 -1>
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = add <16 x i32> [[SHUFFLE]], <i32 -1, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0
 ; CHECK-NEXT:    store atomic i32 [[TMP4]], i32* [[VALS:%.*]] unordered, align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP3]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = and i32 [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[V44:%.*]] = add i32 [[TMP2]], 16
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[V44]], i32 0
-; CHECK-NEXT:    [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[OP_RDX]], i32 0
+; CHECK-NEXT:    [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[V44]], i32 1
 ; CHECK-NEXT:    br i1 true, label [[BCI_15]], label [[LOOPEXIT:%.*]]
 ; CHECK:       loopexit:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll
index e911d9c..9a9e6b4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll
@@ -60,7 +60,7 @@ define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) {
 
 define i8 @i(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @i(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]], <4 x i32> <i32 2, i32 1, i32 7, i32 4>
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP2]])
 ; CHECK-NEXT:    ret i8 [[TMP3]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
index e3ee84f..16da1ee 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
@@ -60,7 +60,7 @@ define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) {
 
 define i8 @i(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @i(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]], <4 x i32> <i32 2, i32 1, i32 7, i32 4>
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP2]])
 ; CHECK-NEXT:    ret i8 [[TMP3]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
index 8db775a..3178b34 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
@@ -140,6 +140,32 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
   ret <8 x i16> %r07
 }
 
+; PR41892
+define void @test_v4f32_v2f32_store(<4 x float> %f, float* %p){
+; CHECK-LABEL: @test_v4f32_v2f32_store(
+; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0
+; CHECK-NEXT:    [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1
+; CHECK-NEXT:    [[ADD01:%.*]] = fadd float [[X0]], [[X1]]
+; CHECK-NEXT:    store float [[ADD01]], float* [[P:%.*]], align 4
+; CHECK-NEXT:    [[X2:%.*]] = extractelement <4 x float> [[F]], i64 2
+; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x float> [[F]], i64 3
+; CHECK-NEXT:    [[ADD23:%.*]] = fadd float [[X2]], [[X3]]
+; CHECK-NEXT:    [[P23:%.*]] = getelementptr inbounds float, float* [[P]], i64 1
+; CHECK-NEXT:    store float [[ADD23]], float* [[P23]], align 4
+; CHECK-NEXT:    ret void
+;
+  %x0 = extractelement <4 x float> %f, i64 0
+  %x1 = extractelement <4 x float> %f, i64 1
+  %add01 = fadd float %x0, %x1
+  store float %add01, float* %p, align 4
+  %x2 = extractelement <4 x float> %f, i64 2
+  %x3 = extractelement <4 x float> %f, i64 3
+  %add23 = fadd float %x2, %x3
+  %p23 = getelementptr inbounds float, float* %p, i64 1
+  store float %add23, float * %p23, align 4
+  ret void
+}
+
 ;
 ; 256-bit vectors
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
index 3f332c1..ce2aa23 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
@@ -140,6 +140,32 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
   ret <8 x i16> %r07
 }
 
+; PR41892
+define void @test_v4f32_v2f32_store(<4 x float> %f, float* %p){
+; CHECK-LABEL: @test_v4f32_v2f32_store(
+; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0
+; CHECK-NEXT:    [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1
+; CHECK-NEXT:    [[ADD01:%.*]] = fadd float [[X0]], [[X1]]
+; CHECK-NEXT:    store float [[ADD01]], float* [[P:%.*]], align 4
+; CHECK-NEXT:    [[X2:%.*]] = extractelement <4 x float> [[F]], i64 2
+; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x float> [[F]], i64 3
+; CHECK-NEXT:    [[ADD23:%.*]] = fadd float [[X2]], [[X3]]
+; CHECK-NEXT:    [[P23:%.*]] = getelementptr inbounds float, float* [[P]], i64 1
+; CHECK-NEXT:    store float [[ADD23]], float* [[P23]], align 4
+; CHECK-NEXT:    ret void
+;
+  %x0 = extractelement <4 x float> %f, i64 0
+  %x1 = extractelement <4 x float> %f, i64 1
+  %add01 = fadd float %x0, %x1
+  store float %add01, float* %p, align 4
+  %x2 = extractelement <4 x float> %f, i64 2
+  %x3 = extractelement <4 x float> %f, i64 3
+  %add23 = fadd float %x2, %x3
+  %p23 = getelementptr inbounds float, float* %p, i64 1
+  store float %add23, float * %p23, align 4
+  ret void
+}
+
 ;
 ; 256-bit vectors
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
index d9ca7d5..048c567 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
@@ -13,58 +13,30 @@ define float @baz() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]]
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x float> poison, float [[TMP10]], i32 0
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <8 x float> [[TMP11]], float [[TMP9]], i32 1
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x float> [[TMP12]], float [[TMP5]], i32 2
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <8 x float> [[TMP13]], float [[TMP4]], i32 3
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[TMP10]], i32 4
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <8 x float> [[TMP15]], float [[TMP9]], i32 5
-; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <8 x float> [[TMP16]], float [[TMP5]], i32 6
-; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <8 x float> [[TMP17]], float [[TMP4]], i32 7
-; CHECK-NEXT:    [[TMP19:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP18]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP19]], [[CONV]]
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
-; CHECK-NEXT:    store float [[OP_EXTRA1]], float* @res, align 4
-; CHECK-NEXT:    ret float [[OP_EXTRA1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[SHUFFLE]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]]
+; CHECK-NEXT:    store float [[OP_RDX1]], float* @res, align 4
+; CHECK-NEXT:    ret float [[OP_RDX1]]
 ;
 ; THRESHOLD-LABEL: @baz(
 ; THRESHOLD-NEXT:  entry:
 ; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
 ; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
 ; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16
-; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
-; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]]
-; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
-; THRESHOLD-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
-; THRESHOLD-NEXT:    [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8
-; THRESHOLD-NEXT:    [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8
-; THRESHOLD-NEXT:    [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]]
-; THRESHOLD-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
-; THRESHOLD-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
-; THRESHOLD-NEXT:    [[TMP11:%.*]] = insertelement <8 x float> poison, float [[TMP10]], i32 0
-; THRESHOLD-NEXT:    [[TMP12:%.*]] = insertelement <8 x float> [[TMP11]], float [[TMP9]], i32 1
-; THRESHOLD-NEXT:    [[TMP13:%.*]] = insertelement <8 x float> [[TMP12]], float [[TMP5]], i32 2
-; THRESHOLD-NEXT:    [[TMP14:%.*]] = insertelement <8 x float> [[TMP13]], float [[TMP4]], i32 3
-; THRESHOLD-NEXT:    [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[TMP10]], i32 4
-; THRESHOLD-NEXT:    [[TMP16:%.*]] = insertelement <8 x float> [[TMP15]], float [[TMP9]], i32 5
-; THRESHOLD-NEXT:    [[TMP17:%.*]] = insertelement <8 x float> [[TMP16]], float [[TMP5]], i32 6
-; THRESHOLD-NEXT:    [[TMP18:%.*]] = insertelement <8 x float> [[TMP17]], float [[TMP4]], i32 7
-; THRESHOLD-NEXT:    [[TMP19:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP18]])
-; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP19]], [[CONV]]
-; THRESHOLD-NEXT:    [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
-; THRESHOLD-NEXT:    store float [[OP_EXTRA1]], float* @res, align 4
-; THRESHOLD-NEXT:    ret float [[OP_EXTRA1]]
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
+; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
+; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
+; THRESHOLD-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+; THRESHOLD-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[SHUFFLE]])
+; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]]
+; THRESHOLD-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]]
+; THRESHOLD-NEXT:    store float [[OP_RDX1]], float* @res, align 4
+; THRESHOLD-NEXT:    ret float [[OP_RDX1]]
 ;
 entry:
   %0 = load i32, i32* @n, align 4
@@ -107,10 +79,10 @@ define float @bazz() {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]]
-; CHECK-NEXT:    store float [[OP_EXTRA1]], float* @res, align 4
-; CHECK-NEXT:    ret float [[OP_EXTRA1]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV6]]
+; CHECK-NEXT:    store float [[OP_RDX1]], float* @res, align 4
+; CHECK-NEXT:    ret float [[OP_RDX1]]
 ;
 ; THRESHOLD-LABEL: @bazz(
 ; THRESHOLD-NEXT:  entry:
@@ -123,10 +95,10 @@ define float @bazz() {
 ; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
 ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
 ; THRESHOLD-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]])
-; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
-; THRESHOLD-NEXT:    [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]]
-; THRESHOLD-NEXT:    store float [[OP_EXTRA1]], float* @res, align 4
-; THRESHOLD-NEXT:    ret float [[OP_EXTRA1]]
+; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]]
+; THRESHOLD-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV6]]
+; THRESHOLD-NEXT:    store float [[OP_RDX1]], float* @res, align 4
+; THRESHOLD-NEXT:    ret float [[OP_RDX1]]
 ;
 entry:
   %0 = load i32, i32* @n, align 4
@@ -343,25 +315,25 @@ entry:
 define float @f(float* nocapture readonly %x) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <16 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP3]])
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_32]] to <16 x float>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x float>, <16 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP3]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
 ; CHECK-NEXT:    ret float [[OP_RDX]]
 ;
 ; THRESHOLD-LABEL: @f(
 ; THRESHOLD-NEXT:  entry:
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <16 x float>*
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
-; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
-; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>*
-; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4
-; THRESHOLD-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP3]])
-; THRESHOLD-NEXT:    [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]])
+; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>*
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
+; THRESHOLD-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32
+; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_32]] to <16 x float>*
+; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <16 x float>, <16 x float>* [[TMP2]], align 4
+; THRESHOLD-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]])
+; THRESHOLD-NEXT:    [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP3]])
 ; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
 ; THRESHOLD-NEXT:    ret float [[OP_RDX]]
 ;
@@ -519,8 +491,8 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
-; CHECK-NEXT:    ret float [[OP_EXTRA]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]]
+; CHECK-NEXT:    ret float [[OP_RDX]]
 ;
 ; THRESHOLD-LABEL: @f1(
 ; THRESHOLD-NEXT:  entry:
@@ -529,8 +501,8 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
 ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>*
 ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
 ; THRESHOLD-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]])
-; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
-; THRESHOLD-NEXT:    ret float [[OP_EXTRA]]
+; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]]
+; THRESHOLD-NEXT:    ret float [[OP_RDX]]
 ;
   entry:
   %rem = srem i32 %a, %b
@@ -637,50 +609,50 @@ define float @loadadd31(float* nocapture readonly %x) {
 ; CHECK-LABEL: @loadadd31(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP7]])
-; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP5]])
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[ARRAYIDX]] to <16 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <8 x float>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_24]] to <4 x float>*
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[ARRAYIDX_28]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
+; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX_29]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
+; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
-; CHECK-NEXT:    [[TMP11:%.*]] = fadd fast float [[OP_RDX1]], [[TMP1]]
-; CHECK-NEXT:    [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
-; CHECK-NEXT:    ret float [[TMP12]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP6]]
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP7]]
+; CHECK-NEXT:    ret float [[OP_RDX3]]
 ;
 ; THRESHOLD-LABEL: @loadadd31(
 ; THRESHOLD-NEXT:  entry:
 ; THRESHOLD-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; THRESHOLD-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
-; THRESHOLD-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
-; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>*
-; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
-; THRESHOLD-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
-; THRESHOLD-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>*
-; THRESHOLD-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
-; THRESHOLD-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
-; THRESHOLD-NEXT:    [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>*
-; THRESHOLD-NEXT:    [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4
-; THRESHOLD-NEXT:    [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP7]])
-; THRESHOLD-NEXT:    [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP5]])
+; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[ARRAYIDX]] to <16 x float>*
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
+; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
+; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <8 x float>*
+; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* [[TMP2]], align 4
+; THRESHOLD-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
+; THRESHOLD-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_24]] to <4 x float>*
+; THRESHOLD-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
+; THRESHOLD-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
+; THRESHOLD-NEXT:    [[TMP6:%.*]] = load float, float* [[ARRAYIDX_28]], align 4
+; THRESHOLD-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
+; THRESHOLD-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX_29]], align 4
+; THRESHOLD-NEXT:    [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]])
+; THRESHOLD-NEXT:    [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]])
 ; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
-; THRESHOLD-NEXT:    [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
+; THRESHOLD-NEXT:    [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
 ; THRESHOLD-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
-; THRESHOLD-NEXT:    [[TMP11:%.*]] = fadd fast float [[OP_RDX1]], [[TMP1]]
-; THRESHOLD-NEXT:    [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
-; THRESHOLD-NEXT:    ret float [[TMP12]]
+; THRESHOLD-NEXT:    [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP6]]
+; THRESHOLD-NEXT:    [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP7]]
+; THRESHOLD-NEXT:    ret float [[OP_RDX3]]
 ;
   entry:
   %arrayidx = getelementptr inbounds float, float* %x, i64 1
@@ -780,25 +752,25 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
-; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
-; CHECK-NEXT:    ret float [[OP_EXTRA1]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], 3.000000e+00
+; CHECK-NEXT:    ret float [[OP_RDX2]]
 ;
 ; THRESHOLD-LABEL: @extra_args(
 ; THRESHOLD-NEXT:  entry:
 ; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
 ; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
-; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
 ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>*
 ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
 ; THRESHOLD-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
-; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
-; THRESHOLD-NEXT:    [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
-; THRESHOLD-NEXT:    ret float [[OP_EXTRA1]]
+; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]]
+; THRESHOLD-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]]
+; THRESHOLD-NEXT:    [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], 3.000000e+00
+; THRESHOLD-NEXT:    ret float [[OP_RDX2]]
 ;
   entry:
   %mul = mul nsw i32 %b, %a
@@ -836,29 +808,29 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
-; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00
-; CHECK-NEXT:    [[OP_EXTRA2:%.*]] = fadd fast float [[OP_EXTRA1]], 5.000000e+00
-; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = fadd fast float [[OP_EXTRA2]], [[CONV]]
-; CHECK-NEXT:    ret float [[OP_EXTRA3]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], 5.000000e+00
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 5.000000e+00
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], 3.000000e+00
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[CONV]]
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = fadd fast float [[OP_RDX3]], [[CONV]]
+; CHECK-NEXT:    ret float [[OP_RDX4]]
 ;
 ; THRESHOLD-LABEL: @extra_args_same_several_times(
 ; THRESHOLD-NEXT:  entry:
 ; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
 ; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
-; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
 ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>*
 ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
 ; THRESHOLD-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
-; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
-; THRESHOLD-NEXT:    [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00
-; THRESHOLD-NEXT:    [[OP_EXTRA2:%.*]] = fadd fast float [[OP_EXTRA1]], 5.000000e+00
-; THRESHOLD-NEXT:    [[OP_EXTRA3:%.*]] = fadd fast float [[OP_EXTRA2]], [[CONV]]
-; THRESHOLD-NEXT:    ret float [[OP_EXTRA3]]
+; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], 5.000000e+00
+; THRESHOLD-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 5.000000e+00
+; THRESHOLD-NEXT:    [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], 3.000000e+00
+; THRESHOLD-NEXT:    [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[CONV]]
+; THRESHOLD-NEXT:    [[OP_RDX4:%.*]] = fadd fast float [[OP_RDX3]], [[CONV]]
+; THRESHOLD-NEXT:    ret float [[OP_RDX4]]
 ;
   entry:
   %mul = mul nsw i32 %b, %a
@@ -899,28 +871,28 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
 ; CHECK-NEXT:    [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
-; CHECK-NEXT:    [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00
-; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
-; CHECK-NEXT:    ret float [[OP_EXTRA1]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[CONVC]]
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], 3.000000e+00
+; CHECK-NEXT:    ret float [[OP_RDX3]]
 ;
 ; THRESHOLD-LABEL: @extra_args_no_replace(
 ; THRESHOLD-NEXT:  entry:
 ; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
 ; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
 ; THRESHOLD-NEXT:    [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
-; THRESHOLD-NEXT:    [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00
-; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]]
 ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>*
 ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
 ; THRESHOLD-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
-; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
-; THRESHOLD-NEXT:    [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
-; THRESHOLD-NEXT:    ret float [[OP_EXTRA1]]
+; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]]
+; THRESHOLD-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]]
+; THRESHOLD-NEXT:    [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[CONVC]]
+; THRESHOLD-NEXT:    [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], 3.000000e+00
+; THRESHOLD-NEXT:    ret float [[OP_RDX3]]
 ;
   entry:
   %mul = mul nsw i32 %b, %a
@@ -1019,9 +991,9 @@ define i32 @wobble(i32 %arg, i32 %bar) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add nuw i32 [[TMP6]], [[ARG]]
-; CHECK-NEXT:    [[OP_EXTRA2:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP3]]
-; CHECK-NEXT:    ret i32 [[OP_EXTRA2]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = add nsw i32 [[TMP6]], [[TMP3]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = add nuw i32 [[OP_RDX]], [[ARG]]
+; CHECK-NEXT:    ret i32 [[OP_RDX2]]
 ;
 ; THRESHOLD-LABEL: @wobble(
 ; THRESHOLD-NEXT:  bb:
@@ -1034,9 +1006,9 @@ define i32 @wobble(i32 %arg, i32 %bar) {
 ; THRESHOLD-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
 ; THRESHOLD-NEXT:    [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32>
 ; THRESHOLD-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
-; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = add nuw i32 [[TMP6]], [[ARG]]
-; THRESHOLD-NEXT:    [[OP_EXTRA2:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP3]]
-; THRESHOLD-NEXT:    ret i32 [[OP_EXTRA2]]
+; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = add nsw i32 [[TMP6]], [[TMP3]]
+; THRESHOLD-NEXT:    [[OP_RDX2:%.*]] = add nuw i32 [[OP_RDX]], [[ARG]]
+; THRESHOLD-NEXT:    ret i32 [[OP_RDX2]]
 ;
   bb:
   %x1 = xor i32 %arg, %bar
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
index ef45de7..470fa0c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
@@ -440,23 +440,11 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
 ;
 ; MINTREESIZE-LABEL: @reschedule_extract(
 ; MINTREESIZE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
-; MINTREESIZE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2
-; MINTREESIZE-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1
-; MINTREESIZE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0
-; MINTREESIZE-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
-; MINTREESIZE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2
-; MINTREESIZE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1
-; MINTREESIZE-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0
-; MINTREESIZE-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0
-; MINTREESIZE-NEXT:    [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1
-; MINTREESIZE-NEXT:    [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
-; MINTREESIZE-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1
-; MINTREESIZE-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0
-; MINTREESIZE-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1
-; MINTREESIZE-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
-; MINTREESIZE-NEXT:    [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT:    [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT:    ret <4 x float> [[TMP17]]
+; MINTREESIZE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
+; MINTREESIZE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; MINTREESIZE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT:    ret <4 x float> [[TMP5]]
 ;
   %a0 = extractelement <4 x float> %a, i32 0
   %b0 = extractelement <4 x float> %b, i32 0
@@ -490,23 +478,11 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
 ;
 ; MINTREESIZE-LABEL: @take_credit(
 ; MINTREESIZE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
-; MINTREESIZE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2
-; MINTREESIZE-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1
-; MINTREESIZE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0
-; MINTREESIZE-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
-; MINTREESIZE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2
-; MINTREESIZE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1
-; MINTREESIZE-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0
-; MINTREESIZE-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0
-; MINTREESIZE-NEXT:    [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1
-; MINTREESIZE-NEXT:    [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
-; MINTREESIZE-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1
-; MINTREESIZE-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0
-; MINTREESIZE-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1
-; MINTREESIZE-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
-; MINTREESIZE-NEXT:    [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT:    [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT:    ret <4 x float> [[TMP17]]
+; MINTREESIZE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
+; MINTREESIZE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; MINTREESIZE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT:    ret <4 x float> [[TMP5]]
 ;
   %a0 = extractelement <4 x float> %a, i32 0
   %b0 = extractelement <4 x float> %b, i32 0
@@ -564,39 +540,11 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr
 ;
 ; MINTREESIZE-LABEL: @_vadd256(
 ; MINTREESIZE-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7
-; MINTREESIZE-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[B]], i32 6
-; MINTREESIZE-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[B]], i32 5
-; MINTREESIZE-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[B]], i32 4
-; MINTREESIZE-NEXT:    [[TMP5:%.*]] = extractelement <8 x float> [[B]], i32 3
-; MINTREESIZE-NEXT:    [[TMP6:%.*]] = extractelement <8 x float> [[B]], i32 2
-; MINTREESIZE-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> [[B]], i32 1
-; MINTREESIZE-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[B]], i32 0
-; MINTREESIZE-NEXT:    [[TMP9:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7
-; MINTREESIZE-NEXT:    [[TMP10:%.*]] = extractelement <8 x float> [[A]], i32 6
-; MINTREESIZE-NEXT:    [[TMP11:%.*]] = extractelement <8 x float> [[A]], i32 5
-; MINTREESIZE-NEXT:    [[TMP12:%.*]] = extractelement <8 x float> [[A]], i32 4
-; MINTREESIZE-NEXT:    [[TMP13:%.*]] = extractelement <8 x float> [[A]], i32 3
-; MINTREESIZE-NEXT:    [[TMP14:%.*]] = extractelement <8 x float> [[A]], i32 2
-; MINTREESIZE-NEXT:    [[TMP15:%.*]] = extractelement <8 x float> [[A]], i32 1
-; MINTREESIZE-NEXT:    [[TMP16:%.*]] = extractelement <8 x float> [[A]], i32 0
-; MINTREESIZE-NEXT:    [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP16]], i32 0
-; MINTREESIZE-NEXT:    [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP8]], i32 1
-; MINTREESIZE-NEXT:    [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0
-; MINTREESIZE-NEXT:    [[TMP20:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP7]], i32 1
-; MINTREESIZE-NEXT:    [[TMP21:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i32 0
-; MINTREESIZE-NEXT:    [[TMP22:%.*]] = insertelement <2 x float> [[TMP21]], float [[TMP6]], i32 1
-; MINTREESIZE-NEXT:    [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0
-; MINTREESIZE-NEXT:    [[TMP24:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP5]], i32 1
-; MINTREESIZE-NEXT:    [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0
-; MINTREESIZE-NEXT:    [[TMP26:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP4]], i32 1
-; MINTREESIZE-NEXT:    [[TMP27:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i32 0
-; MINTREESIZE-NEXT:    [[TMP28:%.*]] = insertelement <2 x float> [[TMP27]], float [[TMP3]], i32 1
-; MINTREESIZE-NEXT:    [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i32 0
-; MINTREESIZE-NEXT:    [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP2]], i32 1
-; MINTREESIZE-NEXT:    [[TMP31:%.*]] = insertelement <2 x float> poison, float [[TMP9]], i32 0
-; MINTREESIZE-NEXT:    [[TMP32:%.*]] = insertelement <2 x float> [[TMP31]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT:    [[TMP33:%.*]] = fadd <8 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT:    ret <8 x float> [[TMP33]]
+; MINTREESIZE-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7
+; MINTREESIZE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; MINTREESIZE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT:    [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT:    ret <8 x float> [[TMP5]]
 ;
   %vecext = extractelement <8 x float> %a, i32 0
   %vecext1 = extractelement <8 x float> %b, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index e42bc3b..d7adf19 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -475,23 +475,11 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
 ;
 ; MINTREESIZE-LABEL: @reschedule_extract(
 ; MINTREESIZE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
-; MINTREESIZE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2
-; MINTREESIZE-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1
-; MINTREESIZE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0
-; MINTREESIZE-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
-; MINTREESIZE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2
-; MINTREESIZE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1
-; MINTREESIZE-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0
-; MINTREESIZE-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0
-; MINTREESIZE-NEXT:    [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1
-; MINTREESIZE-NEXT:    [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
-; MINTREESIZE-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1
-; MINTREESIZE-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0
-; MINTREESIZE-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1
-; MINTREESIZE-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
-; MINTREESIZE-NEXT:    [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT:    [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT:    ret <4 x float> [[TMP17]]
+; MINTREESIZE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
+; MINTREESIZE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; MINTREESIZE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT:    ret <4 x float> [[TMP5]]
 ;
   %a0 = extractelement <4 x float> %a, i32 0
   %b0 = extractelement <4 x float> %b, i32 0
@@ -525,23 +513,11 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
 ;
 ; MINTREESIZE-LABEL: @take_credit(
 ; MINTREESIZE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
-; MINTREESIZE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2
-; MINTREESIZE-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1
-; MINTREESIZE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0
-; MINTREESIZE-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
-; MINTREESIZE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2
-; MINTREESIZE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1
-; MINTREESIZE-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0
-; MINTREESIZE-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0
-; MINTREESIZE-NEXT:    [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1
-; MINTREESIZE-NEXT:    [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
-; MINTREESIZE-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1
-; MINTREESIZE-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0
-; MINTREESIZE-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1
-; MINTREESIZE-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
-; MINTREESIZE-NEXT:    [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT:    [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT:    ret <4 x float> [[TMP17]]
+; MINTREESIZE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
+; MINTREESIZE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; MINTREESIZE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT:    ret <4 x float> [[TMP5]]
 ;
   %a0 = extractelement <4 x float> %a, i32 0
   %b0 = extractelement <4 x float> %b, i32 0
@@ -599,39 +575,11 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr
 ;
 ; MINTREESIZE-LABEL: @_vadd256(
 ; MINTREESIZE-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7
-; MINTREESIZE-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[B]], i32 6
-; MINTREESIZE-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[B]], i32 5
-; MINTREESIZE-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[B]], i32 4
-; MINTREESIZE-NEXT:    [[TMP5:%.*]] = extractelement <8 x float> [[B]], i32 3
-; MINTREESIZE-NEXT:    [[TMP6:%.*]] = extractelement <8 x float> [[B]], i32 2
-; MINTREESIZE-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> [[B]], i32 1
-; MINTREESIZE-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[B]], i32 0
-; MINTREESIZE-NEXT:    [[TMP9:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7
-; MINTREESIZE-NEXT:    [[TMP10:%.*]] = extractelement <8 x float> [[A]], i32 6
-; MINTREESIZE-NEXT:    [[TMP11:%.*]] = extractelement <8 x float> [[A]], i32 5
-; MINTREESIZE-NEXT:    [[TMP12:%.*]] = extractelement <8 x float> [[A]], i32 4
-; MINTREESIZE-NEXT:    [[TMP13:%.*]] = extractelement <8 x float> [[A]], i32 3
-; MINTREESIZE-NEXT:    [[TMP14:%.*]] = extractelement <8 x float> [[A]], i32 2
-; MINTREESIZE-NEXT:    [[TMP15:%.*]] = extractelement <8 x float> [[A]], i32 1
-; MINTREESIZE-NEXT:    [[TMP16:%.*]] = extractelement <8 x float> [[A]], i32 0
-; MINTREESIZE-NEXT:    [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP16]], i32 0
-; MINTREESIZE-NEXT:    [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP8]], i32 1
-; MINTREESIZE-NEXT:    [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0
-; MINTREESIZE-NEXT:    [[TMP20:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP7]], i32 1
-; MINTREESIZE-NEXT:    [[TMP21:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i32 0
-; MINTREESIZE-NEXT:    [[TMP22:%.*]] = insertelement <2 x float> [[TMP21]], float [[TMP6]], i32 1
-; MINTREESIZE-NEXT:    [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0
-; MINTREESIZE-NEXT:    [[TMP24:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP5]], i32 1
-; MINTREESIZE-NEXT:    [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0
-; MINTREESIZE-NEXT:    [[TMP26:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP4]], i32 1
-; MINTREESIZE-NEXT:    [[TMP27:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i32 0
-; MINTREESIZE-NEXT:    [[TMP28:%.*]] = insertelement <2 x float> [[TMP27]], float [[TMP3]], i32 1
-; MINTREESIZE-NEXT:    [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i32 0
-; MINTREESIZE-NEXT:    [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP2]], i32 1
-; MINTREESIZE-NEXT:    [[TMP31:%.*]] = insertelement <2 x float> poison, float [[TMP9]], i32 0
-; MINTREESIZE-NEXT:    [[TMP32:%.*]] = insertelement <2 x float> [[TMP31]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT:    [[TMP33:%.*]] = fadd <8 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT:    ret <8 x float> [[TMP33]]
+; MINTREESIZE-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7
+; MINTREESIZE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; MINTREESIZE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT:    [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT:    ret <8 x float> [[TMP5]]
 ;
   %vecext = extractelement <8 x float> %a, i32 0
   %vecext1 = extractelement <8 x float> %b, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
index 0cfd620..4aa4ce1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
@@ -12,23 +12,25 @@ define void @test() #0 {
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP18:%.*]], [[BB1]] ], [ undef, [[BB]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = mul i32 undef, [[TMP]]
-; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[TMP3]], [[TMP]]
-; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[TMP]]
-; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP]]
-; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP6]], [[TMP]]
-; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP]]
-; CHECK-NEXT:    [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP]]
-; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP]]
-; CHECK-NEXT:    [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP]]
-; CHECK-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP]]
-; CHECK-NEXT:    [[TMP13:%.*]] = mul i32 [[TMP12]], [[TMP]]
-; CHECK-NEXT:    [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP]]
-; CHECK-NEXT:    [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP]]
-; CHECK-NEXT:    [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP]]
-; CHECK-NEXT:    [[TMP17:%.*]] = mul i32 [[TMP16]], [[TMP]]
-; CHECK-NEXT:    [[TMP18]] = mul i32 [[TMP17]], [[TMP]]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[BB1]] ], [ undef, [[BB]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[TMP]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[TMP]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[TMP]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[TMP]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[TMP]], i32 4
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[TMP]], i32 5
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[TMP]], i32 6
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[TMP]], i32 7
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[TMP]], i32 8
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[TMP]], i32 9
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[TMP]], i32 10
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[TMP]], i32 11
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[TMP]], i32 12
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[TMP]], i32 13
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[TMP]], i32 14
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[TMP]], i32 15
+; CHECK-NEXT:    [[TMP16:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP15]])
+; CHECK-NEXT:    [[OP_RDX]] = mul i32 [[TMP16]], undef
 ; CHECK-NEXT:    br label [[BB1]]
 ;
 bb:
@@ -63,23 +65,19 @@ define void @test_2(i8 addrspace(1)* %arg, i32 %arg1) #0 {
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ undef, [[BB:%.*]] ], [ undef, [[BB2]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ 0, [[BB]] ], [ undef, [[BB2]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP]], undef
-; CHECK-NEXT:    [[TMP5:%.*]] = add i32 undef, [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = add i32 undef, [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = add i32 undef, [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = add i32 undef, [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP]], [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = add i32 undef, [[TMP12]]
-; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[TMP]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = add i32 undef, [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = add i32 [[TMP]], [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = add i32 undef, [[TMP16]]
-; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[TMP]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = add i32 undef, [[TMP18]]
-; CHECK-NEXT:    call void @use(i32 [[TMP19]])
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[TMP]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[TMP]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP]], i32 4
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP]], i32 5
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP]], i32 6
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP]], i32 7
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = add i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], undef
+; CHECK-NEXT:    call void @use(i32 [[OP_RDX1]])
 ; CHECK-NEXT:    br label [[BB2]]
 ;
 bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
index 90a259f..472ec76 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
@@ -10,29 +10,28 @@ define i32 @bar() local_unnamed_addr {
 ; CHECK-NEXT:    [[SUB102_1:%.*]] = sub nsw i32 undef, undef
 ; CHECK-NEXT:    [[ADD78_2:%.*]] = add nsw i32 undef, undef
 ; CHECK-NEXT:    [[SUB102_3:%.*]] = sub nsw i32 undef, undef
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB102_3]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[SUB102_1]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD94_1]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[ADD78_1]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[SUB86_1]], i32 4
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[ADD78_2]], i32 5
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD78_1]], i32 1
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[ADD94_1]], i32 2
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_1]], i32 3
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SUB102_3]], i32 4
-; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> poison, <16 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 4>
-; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 21, i32 22, i32 7, i32 24, i32 25, i32 10, i32 27, i32 28, i32 13, i32 30, i32 31>
-; CHECK-NEXT:    [[TMP14:%.*]] = lshr <16 x i32> [[TMP13]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP15:%.*]] = and <16 x i32> [[TMP14]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP16:%.*]] = mul nuw <16 x i32> [[TMP15]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-; CHECK-NEXT:    [[TMP17:%.*]] = add <16 x i32> [[TMP16]], [[TMP13]]
-; CHECK-NEXT:    [[TMP18:%.*]] = xor <16 x i32> [[TMP17]], [[TMP16]]
-; CHECK-NEXT:    [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP18]])
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[TMP19]], 16
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB102_1]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[ADD94_1]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 4
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_1]], i32 3
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_3]], i32 4
+; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 4>
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]]
+; CHECK-NEXT:    [[TMP11:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]]
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP13:%.*]] = lshr <16 x i32> [[TMP12]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP14:%.*]] = and <16 x i32> [[TMP13]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
+; CHECK-NEXT:    [[TMP15:%.*]] = mul nuw <16 x i32> [[TMP14]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP16:%.*]] = add <16 x i32> [[TMP15]], [[TMP12]]
+; CHECK-NEXT:    [[TMP17:%.*]] = xor <16 x i32> [[TMP16]], [[TMP15]]
+; CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP17]])
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[TMP18]], 16
 ; CHECK-NEXT:    [[ADD119:%.*]] = add nuw nsw i32 undef, [[SHR]]
 ; CHECK-NEXT:    [[SHR120:%.*]] = lshr i32 [[ADD119]], 1
 ; CHECK-NEXT:    ret i32 [[SHR120]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll
new file mode 100644
index 0000000..aa9d898
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll
@@ -0,0 +1,189 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown -slp-vectorizer -opaque-pointers -mcpu=x86-64    -S | FileCheck %s --check-prefixes=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -slp-vectorizer -opaque-pointers -mcpu=x86-64-v2 -S | FileCheck %s --check-prefixes=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -slp-vectorizer -opaque-pointers -mcpu=x86-64-v3 -S | FileCheck %s --check-prefixes=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -slp-vectorizer -opaque-pointers -mcpu=x86-64-v4 -S | FileCheck %s --check-prefixes=AVX
+
+define { i64, i64 } @compute_min(ptr nocapture noundef nonnull readonly align 2 dereferenceable(16) %x, ptr nocapture noundef nonnull readonly align 2 dereferenceable(16) %y) {
+; SSE-LABEL: @compute_min(
+; SSE-NEXT:  entry:
+; SSE-NEXT:    [[TMP0:%.*]] = load i16, ptr [[Y:%.*]], align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 2
+; SSE-NEXT:    [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP0]], i16 [[TMP1]])
+; SSE-NEXT:    [[ARRAYIDX_I_I_1:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 1
+; SSE-NEXT:    [[ARRAYIDX_I_I10_1:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 1
+; SSE-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_1]], align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_I_I_1]], align 2
+; SSE-NEXT:    [[TMP5:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP3]], i16 [[TMP4]])
+; SSE-NEXT:    [[ARRAYIDX_I_I_2:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 2
+; SSE-NEXT:    [[ARRAYIDX_I_I10_2:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 2
+; SSE-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_2]], align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX_I_I_2]], align 2
+; SSE-NEXT:    [[TMP8:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP6]], i16 [[TMP7]])
+; SSE-NEXT:    [[ARRAYIDX_I_I_3:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 3
+; SSE-NEXT:    [[ARRAYIDX_I_I10_3:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 3
+; SSE-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_3]], align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX_I_I_3]], align 2
+; SSE-NEXT:    [[TMP11:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP9]], i16 [[TMP10]])
+; SSE-NEXT:    [[ARRAYIDX_I_I_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 4
+; SSE-NEXT:    [[ARRAYIDX_I_I10_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 4
+; SSE-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_4]], align 2
+; SSE-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX_I_I_4]], align 2
+; SSE-NEXT:    [[TMP14:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP12]], i16 [[TMP13]])
+; SSE-NEXT:    [[ARRAYIDX_I_I_5:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 5
+; SSE-NEXT:    [[ARRAYIDX_I_I10_5:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 5
+; SSE-NEXT:    [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_5]], align 2
+; SSE-NEXT:    [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX_I_I_5]], align 2
+; SSE-NEXT:    [[TMP17:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP15]], i16 [[TMP16]])
+; SSE-NEXT:    [[ARRAYIDX_I_I_6:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 6
+; SSE-NEXT:    [[ARRAYIDX_I_I10_6:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 6
+; SSE-NEXT:    [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_6]], align 2
+; SSE-NEXT:    [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX_I_I_6]], align 2
+; SSE-NEXT:    [[TMP20:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP18]], i16 [[TMP19]])
+; SSE-NEXT:    [[ARRAYIDX_I_I_7:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 7
+; SSE-NEXT:    [[ARRAYIDX_I_I10_7:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 7
+; SSE-NEXT:    [[TMP21:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_7]], align 2
+; SSE-NEXT:    [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX_I_I_7]], align 2
+; SSE-NEXT:    [[TMP23:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP21]], i16 [[TMP22]])
+; SSE-NEXT:    [[RETVAL_SROA_4_0_INSERT_EXT:%.*]] = zext i16 [[TMP11]] to i64
+; SSE-NEXT:    [[RETVAL_SROA_4_0_INSERT_SHIFT:%.*]] = shl nuw i64 [[RETVAL_SROA_4_0_INSERT_EXT]], 48
+; SSE-NEXT:    [[RETVAL_SROA_3_0_INSERT_EXT:%.*]] = zext i16 [[TMP8]] to i64
+; SSE-NEXT:    [[RETVAL_SROA_3_0_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[RETVAL_SROA_3_0_INSERT_EXT]], 32
+; SSE-NEXT:    [[RETVAL_SROA_3_0_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_4_0_INSERT_SHIFT]], [[RETVAL_SROA_3_0_INSERT_SHIFT]]
+; SSE-NEXT:    [[RETVAL_SROA_2_0_INSERT_EXT:%.*]] = zext i16 [[TMP5]] to i64
+; SSE-NEXT:    [[RETVAL_SROA_2_0_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[RETVAL_SROA_2_0_INSERT_EXT]], 16
+; SSE-NEXT:    [[RETVAL_SROA_2_0_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_3_0_INSERT_INSERT]], [[RETVAL_SROA_2_0_INSERT_SHIFT]]
+; SSE-NEXT:    [[RETVAL_SROA_0_0_INSERT_EXT:%.*]] = zext i16 [[TMP2]] to i64
+; SSE-NEXT:    [[RETVAL_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_2_0_INSERT_INSERT]], [[RETVAL_SROA_0_0_INSERT_EXT]]
+; SSE-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue { i64, i64 } poison, i64 [[RETVAL_SROA_0_0_INSERT_INSERT]], 0
+; SSE-NEXT:    [[RETVAL_SROA_9_8_INSERT_EXT:%.*]] = zext i16 [[TMP23]] to i64
+; SSE-NEXT:    [[RETVAL_SROA_9_8_INSERT_SHIFT:%.*]] = shl nuw i64 [[RETVAL_SROA_9_8_INSERT_EXT]], 48
+; SSE-NEXT:    [[RETVAL_SROA_8_8_INSERT_EXT:%.*]] = zext i16 [[TMP20]] to i64
+; SSE-NEXT:    [[RETVAL_SROA_8_8_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[RETVAL_SROA_8_8_INSERT_EXT]], 32
+; SSE-NEXT:    [[RETVAL_SROA_8_8_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_9_8_INSERT_SHIFT]], [[RETVAL_SROA_8_8_INSERT_SHIFT]]
+; SSE-NEXT:    [[RETVAL_SROA_7_8_INSERT_EXT:%.*]] = zext i16 [[TMP17]] to i64
+; SSE-NEXT:    [[RETVAL_SROA_7_8_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[RETVAL_SROA_7_8_INSERT_EXT]], 16
+; SSE-NEXT:    [[RETVAL_SROA_7_8_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_8_8_INSERT_INSERT]], [[RETVAL_SROA_7_8_INSERT_SHIFT]]
+; SSE-NEXT:    [[RETVAL_SROA_5_8_INSERT_EXT:%.*]] = zext i16 [[TMP14]] to i64
+; SSE-NEXT:    [[RETVAL_SROA_5_8_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_7_8_INSERT_INSERT]], [[RETVAL_SROA_5_8_INSERT_EXT]]
+; SSE-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue { i64, i64 } [[DOTFCA_0_INSERT]], i64 [[RETVAL_SROA_5_8_INSERT_INSERT]], 1
+; SSE-NEXT:    ret { i64, i64 } [[DOTFCA_1_INSERT]]
+;
+; AVX-LABEL: @compute_min(
+; AVX-NEXT:  entry:
+; AVX-NEXT:    [[TMP0:%.*]] = load i16, ptr [[Y:%.*]], align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 2
+; AVX-NEXT:    [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP0]], i16 [[TMP1]])
+; AVX-NEXT:    [[ARRAYIDX_I_I_1:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 1
+; AVX-NEXT:    [[ARRAYIDX_I_I10_1:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 1
+; AVX-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_1]], align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_I_I_1]], align 2
+; AVX-NEXT:    [[TMP5:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP3]], i16 [[TMP4]])
+; AVX-NEXT:    [[ARRAYIDX_I_I_2:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 2
+; AVX-NEXT:    [[ARRAYIDX_I_I10_2:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 2
+; AVX-NEXT:    [[ARRAYIDX_I_I_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 4
+; AVX-NEXT:    [[ARRAYIDX_I_I10_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 4
+; AVX-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_4]], align 2
+; AVX-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX_I_I_4]], align 2
+; AVX-NEXT:    [[TMP8:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP6]], i16 [[TMP7]])
+; AVX-NEXT:    [[ARRAYIDX_I_I_5:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 5
+; AVX-NEXT:    [[ARRAYIDX_I_I10_5:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 5
+; AVX-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_5]], align 2
+; AVX-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX_I_I_5]], align 2
+; AVX-NEXT:    [[TMP11:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP9]], i16 [[TMP10]])
+; AVX-NEXT:    [[ARRAYIDX_I_I_6:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 6
+; AVX-NEXT:    [[ARRAYIDX_I_I10_6:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 6
+; AVX-NEXT:    [[TMP12:%.*]] = load <2 x i16>, ptr [[ARRAYIDX_I_I10_2]], align 2
+; AVX-NEXT:    [[TMP13:%.*]] = load <2 x i16>, ptr [[ARRAYIDX_I_I_2]], align 2
+; AVX-NEXT:    [[TMP14:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP12]], <2 x i16> [[TMP13]])
+; AVX-NEXT:    [[TMP15:%.*]] = zext <2 x i16> [[TMP14]] to <2 x i64>
+; AVX-NEXT:    [[TMP16:%.*]] = shl nuw <2 x i64> [[TMP15]], <i64 32, i64 48>
+; AVX-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP16]], i32 0
+; AVX-NEXT:    [[TMP18:%.*]] = extractelement <2 x i64> [[TMP16]], i32 1
+; AVX-NEXT:    [[RETVAL_SROA_3_0_INSERT_INSERT:%.*]] = or i64 [[TMP18]], [[TMP17]]
+; AVX-NEXT:    [[RETVAL_SROA_2_0_INSERT_EXT:%.*]] = zext i16 [[TMP5]] to i64
+; AVX-NEXT:    [[RETVAL_SROA_2_0_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[RETVAL_SROA_2_0_INSERT_EXT]], 16
+; AVX-NEXT:    [[RETVAL_SROA_2_0_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_3_0_INSERT_INSERT]], [[RETVAL_SROA_2_0_INSERT_SHIFT]]
+; AVX-NEXT:    [[RETVAL_SROA_0_0_INSERT_EXT:%.*]] = zext i16 [[TMP2]] to i64
+; AVX-NEXT:    [[RETVAL_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_2_0_INSERT_INSERT]], [[RETVAL_SROA_0_0_INSERT_EXT]]
+; AVX-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue { i64, i64 } poison, i64 [[RETVAL_SROA_0_0_INSERT_INSERT]], 0
+; AVX-NEXT:    [[TMP19:%.*]] = load <2 x i16>, ptr [[ARRAYIDX_I_I10_6]], align 2
+; AVX-NEXT:    [[TMP20:%.*]] = load <2 x i16>, ptr [[ARRAYIDX_I_I_6]], align 2
+; AVX-NEXT:    [[TMP21:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP19]], <2 x i16> [[TMP20]])
+; AVX-NEXT:    [[TMP22:%.*]] = zext <2 x i16> [[TMP21]] to <2 x i64>
+; AVX-NEXT:    [[TMP23:%.*]] = shl nuw <2 x i64> [[TMP22]], <i64 32, i64 48>
+; AVX-NEXT:    [[TMP24:%.*]] = extractelement <2 x i64> [[TMP23]], i32 0
+; AVX-NEXT:    [[TMP25:%.*]] = extractelement <2 x i64> [[TMP23]], i32 1
+; AVX-NEXT:    [[RETVAL_SROA_8_8_INSERT_INSERT:%.*]] = or i64 [[TMP25]], [[TMP24]]
+; AVX-NEXT:    [[RETVAL_SROA_7_8_INSERT_EXT:%.*]] = zext i16 [[TMP11]] to i64
+; AVX-NEXT:    [[RETVAL_SROA_7_8_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[RETVAL_SROA_7_8_INSERT_EXT]], 16
+; AVX-NEXT:    [[RETVAL_SROA_7_8_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_8_8_INSERT_INSERT]], [[RETVAL_SROA_7_8_INSERT_SHIFT]]
+; AVX-NEXT:    [[RETVAL_SROA_5_8_INSERT_EXT:%.*]] = zext i16 [[TMP8]] to i64
+; AVX-NEXT:    [[RETVAL_SROA_5_8_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_7_8_INSERT_INSERT]], [[RETVAL_SROA_5_8_INSERT_EXT]]
+; AVX-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue { i64, i64 } [[DOTFCA_0_INSERT]], i64 [[RETVAL_SROA_5_8_INSERT_INSERT]], 1
+; AVX-NEXT:    ret { i64, i64 } [[DOTFCA_1_INSERT]]
+;
+entry:
+  %0 = load i16, ptr %y, align 2
+  %1 = load i16, ptr %x, align 2
+  %2 = tail call i16 @llvm.smin.i16(i16 %0, i16 %1)
+  %arrayidx.i.i.1 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 1
+  %arrayidx.i.i10.1 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 1
+  %3 = load i16, ptr %arrayidx.i.i10.1, align 2
+  %4 = load i16, ptr %arrayidx.i.i.1, align 2
+  %5 = tail call i16 @llvm.smin.i16(i16 %3, i16 %4)
+  %arrayidx.i.i.2 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 2
+  %arrayidx.i.i10.2 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 2
+  %6 = load i16, ptr %arrayidx.i.i10.2, align 2
+  %7 = load i16, ptr %arrayidx.i.i.2, align 2
+  %8 = tail call i16 @llvm.smin.i16(i16 %6, i16 %7)
+  %arrayidx.i.i.3 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 3
+  %arrayidx.i.i10.3 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 3
+  %9 = load i16, ptr %arrayidx.i.i10.3, align 2
+  %10 = load i16, ptr %arrayidx.i.i.3, align 2
+  %11 = tail call i16 @llvm.smin.i16(i16 %9, i16 %10)
+  %arrayidx.i.i.4 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 4
+  %arrayidx.i.i10.4 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 4
+  %12 = load i16, ptr %arrayidx.i.i10.4, align 2
+  %13 = load i16, ptr %arrayidx.i.i.4, align 2
+  %14 = tail call i16 @llvm.smin.i16(i16 %12, i16 %13)
+  %arrayidx.i.i.5 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 5
+  %arrayidx.i.i10.5 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 5
+  %15 = load i16, ptr %arrayidx.i.i10.5, align 2
+  %16 = load i16, ptr %arrayidx.i.i.5, align 2
+  %17 = tail call i16 @llvm.smin.i16(i16 %15, i16 %16)
+  %arrayidx.i.i.6 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 6
+  %arrayidx.i.i10.6 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 6
+  %18 = load i16, ptr %arrayidx.i.i10.6, align 2
+  %19 = load i16, ptr %arrayidx.i.i.6, align 2
+  %20 = tail call i16 @llvm.smin.i16(i16 %18, i16 %19)
+  %arrayidx.i.i.7 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 7
+  %arrayidx.i.i10.7 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 7
+  %21 = load i16, ptr %arrayidx.i.i10.7, align 2
+  %22 = load i16, ptr %arrayidx.i.i.7, align 2
+  %23 = tail call i16 @llvm.smin.i16(i16 %21, i16 %22)
+  %retval.sroa.4.0.insert.ext = zext i16 %11 to i64
+  %retval.sroa.4.0.insert.shift = shl nuw i64 %retval.sroa.4.0.insert.ext, 48
+  %retval.sroa.3.0.insert.ext = zext i16 %8 to i64
+  %retval.sroa.3.0.insert.shift = shl nuw nsw i64 %retval.sroa.3.0.insert.ext, 32
+  %retval.sroa.3.0.insert.insert = or i64 %retval.sroa.4.0.insert.shift, %retval.sroa.3.0.insert.shift
+  %retval.sroa.2.0.insert.ext = zext i16 %5 to i64
+  %retval.sroa.2.0.insert.shift = shl nuw nsw i64 %retval.sroa.2.0.insert.ext, 16
+  %retval.sroa.2.0.insert.insert = or i64 %retval.sroa.3.0.insert.insert, %retval.sroa.2.0.insert.shift
+  %retval.sroa.0.0.insert.ext = zext i16 %2 to i64
+  %retval.sroa.0.0.insert.insert = or i64 %retval.sroa.2.0.insert.insert, %retval.sroa.0.0.insert.ext
+  %.fca.0.insert = insertvalue { i64, i64 } poison, i64 %retval.sroa.0.0.insert.insert, 0
+  %retval.sroa.9.8.insert.ext = zext i16 %23 to i64
+  %retval.sroa.9.8.insert.shift = shl nuw i64 %retval.sroa.9.8.insert.ext, 48
+  %retval.sroa.8.8.insert.ext = zext i16 %20 to i64
+  %retval.sroa.8.8.insert.shift = shl nuw nsw i64 %retval.sroa.8.8.insert.ext, 32
+  %retval.sroa.8.8.insert.insert = or i64 %retval.sroa.9.8.insert.shift, %retval.sroa.8.8.insert.shift
+  %retval.sroa.7.8.insert.ext = zext i16 %17 to i64
+  %retval.sroa.7.8.insert.shift = shl nuw nsw i64 %retval.sroa.7.8.insert.ext, 16
+  %retval.sroa.7.8.insert.insert = or i64 %retval.sroa.8.8.insert.insert, %retval.sroa.7.8.insert.shift
+  %retval.sroa.5.8.insert.ext = zext i16 %14 to i64
+  %retval.sroa.5.8.insert.insert = or i64 %retval.sroa.7.8.insert.insert, %retval.sroa.5.8.insert.ext
+  %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.5.8.insert.insert, 1
+  ret { i64, i64 } %.fca.1.insert
+}
+declare i16 @llvm.smin.i16(i16, i16)
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
index 460f222..5769a29 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
@@ -230,17 +230,12 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
 ; CHECK-LABEL: @logical_and_icmp_clamp(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP3:%.*]] = freeze <4 x i1> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP3]])
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
-; CHECK-NEXT:    [[S4:%.*]] = select i1 [[TMP4]], i1 [[TMP5]], i1 false
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
-; CHECK-NEXT:    [[S5:%.*]] = select i1 [[S4]], i1 [[TMP6]], i1 false
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
-; CHECK-NEXT:    [[S6:%.*]] = select i1 [[S5]], i1 [[TMP7]], i1 false
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
-; CHECK-NEXT:    [[S7:%.*]] = select i1 [[S6]], i1 [[TMP8]], i1 false
-; CHECK-NEXT:    ret i1 [[S7]]
+; CHECK-NEXT:    [[TMP5:%.*]] = freeze <4 x i1> [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[TMP4]], i1 [[TMP6]], i1 false
+; CHECK-NEXT:    ret i1 [[OP_RDX]]
 ;
   %x0 = extractelement <4 x i32> %x, i32 0
   %x1 = extractelement <4 x i32> %x, i32 1
@@ -265,53 +260,17 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
 }
 
 define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) {
-; SSE-LABEL: @logical_and_icmp_clamp_extra_use_cmp(
-; SSE-NEXT:    [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
-; SSE-NEXT:    [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
-; SSE-NEXT:    [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
-; SSE-NEXT:    [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
-; SSE-NEXT:    [[C2:%.*]] = icmp slt i32 [[X2]], 42
-; SSE-NEXT:    call void @use1(i1 [[C2]])
-; SSE-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X0]], i32 0
-; SSE-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X1]], i32 1
-; SSE-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[X3]], i32 2
-; SSE-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X0]], i32 3
-; SSE-NEXT:    [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 17>
-; SSE-NEXT:    [[TMP6:%.*]] = icmp sgt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 17>
-; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; SSE-NEXT:    [[D1:%.*]] = icmp sgt i32 [[X1]], 17
-; SSE-NEXT:    [[D2:%.*]] = icmp sgt i32 [[X2]], 17
-; SSE-NEXT:    [[D3:%.*]] = icmp sgt i32 [[X3]], 17
-; SSE-NEXT:    [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]]
-; SSE-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP8]])
-; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP9]], i1 [[C2]], i1 false
-; SSE-NEXT:    [[S5:%.*]] = select i1 [[OP_EXTRA]], i1 [[D1]], i1 false
-; SSE-NEXT:    [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
-; SSE-NEXT:    [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
-; SSE-NEXT:    ret i1 [[S7]]
-;
-; AVX-LABEL: @logical_and_icmp_clamp_extra_use_cmp(
-; AVX-NEXT:    [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
-; AVX-NEXT:    [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
-; AVX-NEXT:    [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
-; AVX-NEXT:    [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
-; AVX-NEXT:    [[C0:%.*]] = icmp slt i32 [[X0]], 42
-; AVX-NEXT:    [[C1:%.*]] = icmp slt i32 [[X1]], 42
-; AVX-NEXT:    [[C2:%.*]] = icmp slt i32 [[X2]], 42
-; AVX-NEXT:    call void @use1(i1 [[C2]])
-; AVX-NEXT:    [[C3:%.*]] = icmp slt i32 [[X3]], 42
-; AVX-NEXT:    [[D0:%.*]] = icmp sgt i32 [[X0]], 17
-; AVX-NEXT:    [[D1:%.*]] = icmp sgt i32 [[X1]], 17
-; AVX-NEXT:    [[D2:%.*]] = icmp sgt i32 [[X2]], 17
-; AVX-NEXT:    [[D3:%.*]] = icmp sgt i32 [[X3]], 17
-; AVX-NEXT:    [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
-; AVX-NEXT:    [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
-; AVX-NEXT:    [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
-; AVX-NEXT:    [[S4:%.*]] = select i1 [[S3]], i1 [[D0]], i1 false
-; AVX-NEXT:    [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
-; AVX-NEXT:    [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
-; AVX-NEXT:    [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
-; AVX-NEXT:    ret i1 [[S7]]
+; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_cmp(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
+; CHECK-NEXT:    call void @use1(i1 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = freeze <4 x i1> [[TMP1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[TMP5]], i1 [[TMP7]], i1 false
+; CHECK-NEXT:    ret i1 [[OP_RDX]]
 ;
   %x0 = extractelement <4 x i32> %x, i32 0
   %x1 = extractelement <4 x i32> %x, i32 1
@@ -338,27 +297,20 @@ define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) {
 
 define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) {
 ; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_select(
-; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
-; CHECK-NEXT:    [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
-; CHECK-NEXT:    [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
-; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
-; CHECK-NEXT:    [[C0:%.*]] = icmp slt i32 [[X0]], 42
-; CHECK-NEXT:    [[C1:%.*]] = icmp slt i32 [[X1]], 42
-; CHECK-NEXT:    [[C2:%.*]] = icmp slt i32 [[X2]], 42
-; CHECK-NEXT:    [[C3:%.*]] = icmp slt i32 [[X3]], 42
-; CHECK-NEXT:    [[D0:%.*]] = icmp sgt i32 [[X0]], 17
-; CHECK-NEXT:    [[D1:%.*]] = icmp sgt i32 [[X1]], 17
-; CHECK-NEXT:    [[D2:%.*]] = icmp sgt i32 [[X2]], 17
-; CHECK-NEXT:    [[D3:%.*]] = icmp sgt i32 [[X3]], 17
-; CHECK-NEXT:    [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
-; CHECK-NEXT:    [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
+; CHECK-NEXT:    [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
+; CHECK-NEXT:    [[S2:%.*]] = select i1 [[S1]], i1 [[TMP5]], i1 false
 ; CHECK-NEXT:    call void @use1(i1 [[S2]])
-; CHECK-NEXT:    [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
-; CHECK-NEXT:    [[S4:%.*]] = select i1 [[S3]], i1 [[D0]], i1 false
-; CHECK-NEXT:    [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
-; CHECK-NEXT:    [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
-; CHECK-NEXT:    [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
-; CHECK-NEXT:    ret i1 [[S7]]
+; CHECK-NEXT:    [[TMP6:%.*]] = freeze <4 x i1> [[TMP2]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
+; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP8]], i1 false
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i1 [[S2]], i1 false
+; CHECK-NEXT:    ret i1 [[OP_RDX1]]
 ;
   %x0 = extractelement <4 x i32> %x, i32 0
   %x1 = extractelement <4 x i32> %x, i32 1
@@ -393,11 +345,11 @@ define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; CHECK-NEXT:    [[Y1:%.*]] = extractelement <8 x i32> [[Y]], i32 1
 ; CHECK-NEXT:    [[Y2:%.*]] = extractelement <8 x i32> [[Y]], i32 2
 ; CHECK-NEXT:    [[Y3:%.*]] = extractelement <8 x i32> [[Y]], i32 3
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[X0]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[X1]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[X1]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[X0]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[X2]], i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[X3]], i32 3
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[Y0]], i32 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[Y1]], i32 5
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[Y2]], i32 6
@@ -435,46 +387,37 @@ define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
 
 define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
 ; SSE-LABEL: @logical_and_icmp_clamp_partial(
-; SSE-NEXT:    [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
-; SSE-NEXT:    [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
-; SSE-NEXT:    [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
-; SSE-NEXT:    [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
-; SSE-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X0]], i32 0
-; SSE-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X1]], i32 1
-; SSE-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[X2]], i32 2
-; SSE-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X0]], i32 3
-; SSE-NEXT:    [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 17>
-; SSE-NEXT:    [[TMP6:%.*]] = icmp sgt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 17>
-; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; SSE-NEXT:    [[D1:%.*]] = icmp sgt i32 [[X1]], 17
-; SSE-NEXT:    [[D2:%.*]] = icmp sgt i32 [[X2]], 17
-; SSE-NEXT:    [[D3:%.*]] = icmp sgt i32 [[X3]], 17
+; SSE-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
+; SSE-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[X]], i32 1
+; SSE-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[X]], i32 0
+; SSE-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
+; SSE-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP3]], i32 1
+; SSE-NEXT:    [[TMP6:%.*]] = icmp slt <2 x i32> [[TMP5]], <i32 42, i32 42>
+; SSE-NEXT:    [[C2:%.*]] = icmp slt i32 [[TMP1]], 42
+; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
 ; SSE-NEXT:    [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]]
 ; SSE-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP8]])
-; SSE-NEXT:    [[S5:%.*]] = select i1 [[TMP9]], i1 [[D1]], i1 false
-; SSE-NEXT:    [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
-; SSE-NEXT:    [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
-; SSE-NEXT:    ret i1 [[S7]]
+; SSE-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
+; SSE-NEXT:    [[OP_RDX:%.*]] = select i1 [[TMP9]], i1 [[TMP10]], i1 false
+; SSE-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1
+; SSE-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i1 [[TMP11]], i1 false
+; SSE-NEXT:    [[OP_RDX2:%.*]] = select i1 [[OP_RDX1]], i1 [[C2]], i1 false
+; SSE-NEXT:    ret i1 [[OP_RDX2]]
 ;
 ; AVX-LABEL: @logical_and_icmp_clamp_partial(
-; AVX-NEXT:    [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
-; AVX-NEXT:    [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
-; AVX-NEXT:    [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
-; AVX-NEXT:    [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
-; AVX-NEXT:    [[C0:%.*]] = icmp slt i32 [[X0]], 42
-; AVX-NEXT:    [[C1:%.*]] = icmp slt i32 [[X1]], 42
-; AVX-NEXT:    [[C2:%.*]] = icmp slt i32 [[X2]], 42
-; AVX-NEXT:    [[D0:%.*]] = icmp sgt i32 [[X0]], 17
-; AVX-NEXT:    [[D1:%.*]] = icmp sgt i32 [[X1]], 17
-; AVX-NEXT:    [[D2:%.*]] = icmp sgt i32 [[X2]], 17
-; AVX-NEXT:    [[D3:%.*]] = icmp sgt i32 [[X3]], 17
-; AVX-NEXT:    [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
-; AVX-NEXT:    [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
-; AVX-NEXT:    [[S4:%.*]] = select i1 [[S2]], i1 [[D0]], i1 false
-; AVX-NEXT:    [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
-; AVX-NEXT:    [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
-; AVX-NEXT:    [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
-; AVX-NEXT:    ret i1 [[S7]]
+; AVX-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
+; AVX-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[X]], i32 1
+; AVX-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[X]], i32 0
+; AVX-NEXT:    [[C0:%.*]] = icmp slt i32 [[TMP3]], 42
+; AVX-NEXT:    [[C1:%.*]] = icmp slt i32 [[TMP2]], 42
+; AVX-NEXT:    [[C2:%.*]] = icmp slt i32 [[TMP1]], 42
+; AVX-NEXT:    [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
+; AVX-NEXT:    [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
+; AVX-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
+; AVX-NEXT:    [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[C1]], i1 false
+; AVX-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i1 [[C0]], i1 false
+; AVX-NEXT:    [[OP_RDX2:%.*]] = select i1 [[OP_RDX1]], i1 [[C2]], i1 false
+; AVX-NEXT:    ret i1 [[OP_RDX2]]
 ;
   %x0 = extractelement <4 x i32> %x, i32 0
   %x1 = extractelement <4 x i32> %x, i32 1
@@ -499,44 +442,17 @@ define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
 }
 
 define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) {
-; SSE-LABEL: @logical_and_icmp_clamp_pred_diff(
-; SSE-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
-; SSE-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[X]], <i32 42, i32 42, i32 42, i32 42>
-; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; SSE-NEXT:    [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
-; SSE-NEXT:    [[TMP5:%.*]] = freeze <4 x i1> [[TMP3]]
-; SSE-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
-; SSE-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
-; SSE-NEXT:    [[S4:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false
-; SSE-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
-; SSE-NEXT:    [[S5:%.*]] = select i1 [[S4]], i1 [[TMP8]], i1 false
-; SSE-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
-; SSE-NEXT:    [[S6:%.*]] = select i1 [[S5]], i1 [[TMP9]], i1 false
-; SSE-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
-; SSE-NEXT:    [[S7:%.*]] = select i1 [[S6]], i1 [[TMP10]], i1 false
-; SSE-NEXT:    ret i1 [[S7]]
-;
-; AVX-LABEL: @logical_and_icmp_clamp_pred_diff(
-; AVX-NEXT:    [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
-; AVX-NEXT:    [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
-; AVX-NEXT:    [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
-; AVX-NEXT:    [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
-; AVX-NEXT:    [[C0:%.*]] = icmp slt i32 [[X0]], 42
-; AVX-NEXT:    [[C1:%.*]] = icmp slt i32 [[X1]], 42
-; AVX-NEXT:    [[C2:%.*]] = icmp slt i32 [[X2]], 42
-; AVX-NEXT:    [[C3:%.*]] = icmp ult i32 [[X3]], 42
-; AVX-NEXT:    [[D0:%.*]] = icmp sgt i32 [[X0]], 17
-; AVX-NEXT:    [[D1:%.*]] = icmp sgt i32 [[X1]], 17
-; AVX-NEXT:    [[D2:%.*]] = icmp sgt i32 [[X2]], 17
-; AVX-NEXT:    [[D3:%.*]] = icmp sgt i32 [[X3]], 17
-; AVX-NEXT:    [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
-; AVX-NEXT:    [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
-; AVX-NEXT:    [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
-; AVX-NEXT:    [[S4:%.*]] = select i1 [[S3]], i1 [[D0]], i1 false
-; AVX-NEXT:    [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
-; AVX-NEXT:    [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
-; AVX-NEXT:    [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
-; AVX-NEXT:    ret i1 [[S7]]
+; CHECK-LABEL: @logical_and_icmp_clamp_pred_diff(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[X]], <i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = freeze <4 x i1> [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP7]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP8]], i1 false
+; CHECK-NEXT:    ret i1 [[OP_RDX]]
 ;
   %x0 = extractelement <4 x i32> %x, i32 0
   %x1 = extractelement <4 x i32> %x, i32 1
@@ -566,8 +482,8 @@ define i1 @logical_and_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
 ; CHECK-NEXT:    [[S3:%.*]] = select i1 [[C:%.*]], i1 [[C]], i1 false
 ; CHECK-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i1 [[S3]], i1 false
-; CHECK-NEXT:    ret i1 [[OP_EXTRA]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[S3]], i1 false
+; CHECK-NEXT:    ret i1 [[OP_RDX]]
 ;
   %x0 = extractelement <4 x i32> %x, i32 0
   %x1 = extractelement <4 x i32> %x, i32 1
@@ -595,8 +511,8 @@ define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
 ; CHECK-NEXT:    [[S3:%.*]] = select i1 [[C:%.*]], i1 true, i1 [[C]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i1 true, i1 [[S3]]
-; CHECK-NEXT:    ret i1 [[OP_EXTRA]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 true, i1 [[S3]]
+; CHECK-NEXT:    ret i1 [[OP_RDX]]
 ;
   %x0 = extractelement <4 x i32> %x, i32 0
   %x1 = extractelement <4 x i32> %x, i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll
index 6bee995..1fd2f30 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll
@@ -8,20 +8,21 @@ define i64 @test() {
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ 0, [[BB2:%.*]] ], [ 0, [[BB1:%.*]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ 0, [[BB2]] ], [ 0, [[BB1]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]]
-; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP6]], [[TMP4]]
-; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP4]]
-; CHECK-NEXT:    [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP4]]
-; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP4]]
-; CHECK-NEXT:    [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP4]]
-; CHECK-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP4]]
-; CHECK-NEXT:    [[TMP13:%.*]] = mul i32 [[TMP12]], [[TMP4]]
-; CHECK-NEXT:    [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP4]]
-; CHECK-NEXT:    [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP4]]
-; CHECK-NEXT:    [[TMP65:%.*]] = sext i32 [[TMP15]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ poison, [[BB2:%.*]] ], [ zeroinitializer, [[BB1:%.*]] ]
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP1]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP1]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP1]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP1]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP1]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP1]], i32 7
+; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP9]])
+; CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = mul i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP65:%.*]] = sext i32 [[OP_RDX]] to i64
 ; CHECK-NEXT:    ret i64 [[TMP65]]
 ;
 bb1:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
index 362967f..0fac440 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
@@ -24,15 +24,15 @@ define i32 @test(i32* nocapture readonly %p) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
-; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]]
+; CHECK-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], [[SUM]]
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
+; CHECK-NEXT:    ret i32 [[OP_RDX]]
 ;
 entry:
   %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1
@@ -97,17 +97,17 @@ define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[Q:%.*]] to <8 x i32>*
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]])
-; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
+; CHECK-NEXT:    [[OP_RDX]] = add i32 [[TMP5]], [[SUM]]
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
+; CHECK-NEXT:    ret i32 [[OP_RDX]]
 ;
 entry:
   %arrayidx.p.1 = getelementptr inbounds i32, i32* %p, i64 1
@@ -188,18 +188,18 @@ define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[Q:%.*]] to <8 x i32>*
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[SHUFFLE]], [[TMP3]]
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[SHUFFLE]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]])
-; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
+; CHECK-NEXT:    [[OP_RDX]] = add i32 [[TMP5]], [[SUM]]
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
+; CHECK-NEXT:    ret i32 [[OP_RDX]]
 ;
 entry:
   %arrayidx.p.1 = getelementptr inbounds i32, i32* %p, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
index 28417c0..7e0dc28 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
@@ -11,9 +11,9 @@
 ; }
 
 ; Vector cost is 5, Scalar cost is 7
-; AVX: Adding cost -2 for reduction that starts with   %7 = load i32, i32* %arrayidx.7, align 4 (It is a splitting reduction)
+; AVX: Adding cost -2 for reduction that starts with   %0 = load i32, i32* %p, align 4 (It is a splitting reduction)
 ; Vector cost is 6, Scalar cost is 7
-; SSE: Adding cost -1 for reduction that starts with   %7 = load i32, i32* %arrayidx.7, align 4 (It is a splitting reduction)
+; SSE: Adding cost -1 for reduction that starts with   %0 = load i32, i32* %p, align 4 (It is a splitting reduction)
 define i32 @test_add(i32* nocapture readonly %p) {
 ; CHECK-LABEL: @test_add(
 ; CHECK-NEXT:  entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
index 0e5d3bc..915fe01 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
@@ -11,31 +11,26 @@ define void @hoge() {
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[T:%.*]] = select i1 undef, i16 undef, i16 15
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i16> <i16 poison, i16 undef>, i16 [[T]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i16> <i16 undef, i16 poison>, i16 [[T]], i32 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32>
-; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <2 x i32> <i32 undef, i32 63>, [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <2 x i32> <i32 63, i32 undef>, [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], undef
-; CHECK-NEXT:    [[SHUFFLE10:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
-; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[SHUFFLE10]], <i32 undef, i32 15, i32 31, i32 47>
+; CHECK-NEXT:    [[SHUFFLE4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[SHUFFLE4]], <i32 15, i32 undef, i32 31, i32 47>
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
 ; CHECK-NEXT:    [[T19:%.*]] = select i1 undef, i32 [[TMP5]], i32 undef
 ; CHECK-NEXT:    [[T20:%.*]] = icmp sgt i32 [[T19]], 63
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <2 x i32> undef, [[TMP1]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub <2 x i32> [[TMP6]], undef
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -49, i32 -33, i32 -33, i32 -17>
-; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP9]], undef
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP9]], i32 undef
-; CHECK-NEXT:    [[OP_EXTRA2:%.*]] = icmp slt i32 [[OP_EXTRA1]], undef
-; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = select i1 [[OP_EXTRA2]], i32 [[OP_EXTRA1]], i32 undef
-; CHECK-NEXT:    [[OP_EXTRA4:%.*]] = icmp slt i32 [[OP_EXTRA3]], undef
-; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = select i1 [[OP_EXTRA4]], i32 [[OP_EXTRA3]], i32 undef
-; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = icmp slt i32 [[OP_EXTRA5]], undef
-; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = select i1 [[OP_EXTRA6]], i32 [[OP_EXTRA5]], i32 undef
-; CHECK-NEXT:    [[OP_EXTRA8:%.*]] = icmp slt i32 [[OP_EXTRA7]], undef
-; CHECK-NEXT:    [[OP_EXTRA9:%.*]] = select i1 [[OP_EXTRA8]], i32 [[OP_EXTRA7]], i32 undef
-; CHECK-NEXT:    [[T45:%.*]] = icmp sgt i32 undef, [[OP_EXTRA9]]
+; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -17, i32 -33, i32 -33, i32 -49>
+; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
+; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = icmp slt i32 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP10]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = icmp slt i32 [[OP_RDX1]], undef
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[OP_RDX1]], i32 undef
+; CHECK-NEXT:    [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX3]]
 ; CHECK-NEXT:    unreachable
 ;
 bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll
index a9a6aa9..e46169a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll
@@ -21,19 +21,19 @@ define void @test() {
 ; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 3
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[I]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 8
-; CHECK-NEXT:    [[I5:%.*]] = add i32 undef, undef
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
-; CHECK-NEXT:    [[OP_EXTRA2:%.*]] = add i32 [[TMP2]], [[I5]]
-; CHECK-NEXT:    [[I10:%.*]] = add i32 [[OP_EXTRA2]], undef
-; CHECK-NEXT:    [[I11:%.*]] = add i32 [[OP_EXTRA2]], [[I10]]
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = add i32 [[TMP2]], undef
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = add i32 [[OP_RDX3]], undef
+; CHECK-NEXT:    [[I10:%.*]] = add i32 [[OP_RDX4]], undef
+; CHECK-NEXT:    [[I11:%.*]] = add i32 [[OP_RDX4]], [[I10]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[I1]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
-; CHECK-NEXT:    [[I12:%.*]] = add i32 undef, undef
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP5]], [[I12]]
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = add i32 [[OP_EXTRA]], undef
-; CHECK-NEXT:    [[I18:%.*]] = add i32 [[OP_EXTRA1]], [[I11]]
-; CHECK-NEXT:    [[I19:%.*]] = add i32 [[OP_EXTRA1]], [[I18]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = add i32 [[TMP5]], undef
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], undef
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = add i32 [[OP_RDX1]], undef
+; CHECK-NEXT:    [[I18:%.*]] = add i32 [[OP_RDX2]], [[I11]]
+; CHECK-NEXT:    [[I19:%.*]] = add i32 [[OP_RDX2]], [[I18]]
 ; CHECK-NEXT:    [[I20:%.*]] = add i32 undef, [[I19]]
 ; CHECK-NEXT:    [[I21:%.*]] = add i32 undef, [[I20]]
 ; CHECK-NEXT:    [[I22:%.*]] = add i32 undef, [[I21]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
index 9502620..654d688 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
@@ -7,8 +7,8 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <2 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4
@@ -53,9 +53,9 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 1, i32 1, i32 0, i32 2, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 2, i32 1, i32 3, i32 1, i32 1, i32 0, i32 2, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4
@@ -104,9 +104,9 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 3, i32 2, i32 3, i32 0, i32 1, i32 0, i32 2, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 2, i32 3, i32 3, i32 0, i32 1, i32 0, i32 2, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4
diff --git a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll
index f48d5e2..b2ae15c 100644
--- a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll
+++ b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll
@@ -43,7 +43,7 @@ declare i32 @llvm.umin.i32(i32, i32)
 define void @test2() {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
+; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>)
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <4 x i32> poison, [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 77)
diff --git a/llvm/test/Transforms/Scalarizer/intrinsics.ll b/llvm/test/Transforms/Scalarizer/intrinsics.ll
index 896000d..4e5a212 100644
--- a/llvm/test/Transforms/Scalarizer/intrinsics.ll
+++ b/llvm/test/Transforms/Scalarizer/intrinsics.ll
@@ -24,6 +24,9 @@ declare <2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32)
 declare <2 x i32> @llvm.smul.fix.sat.v2i32(<2 x i32>, <2 x i32>, i32)
 declare <2 x i32> @llvm.umul.fix.sat.v2i32(<2 x i32>, <2 x i32>, i32)
 
+declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float>)
+declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float>)
+
 
 ; CHECK-LABEL: @scalarize_sqrt_v2f32(
 ; CHECK: %sqrt.i0 = call float @llvm.sqrt.f32(float %x.i0)
@@ -134,3 +137,25 @@ define <2 x i32> @scalarize_umul_fix_sat_v2i32(<2 x i32> %x) #0 {
   %umulfixsat = call <2 x i32> @llvm.umul.fix.sat.v2i32(<2 x i32> %x, <2 x i32> <i32 5, i32 19>, i32 31)
   ret <2 x i32> %umulfixsat
 }
+
+; CHECK-LABEL: @scalarize_fptosi_sat(
+; CHECK: %sat.i0 = call i32 @llvm.fptosi.sat.i32.f32(float %x.i0)
+; CHECK: %sat.i1 = call i32 @llvm.fptosi.sat.i32.f32(float %x.i1)
+; CHECK: %sat.upto0 = insertelement <2 x i32> poison, i32 %sat.i0, i32 0
+; CHECK: %sat = insertelement <2 x i32> %sat.upto0, i32 %sat.i1, i32 1
+; CHECK: ret <2 x i32> %sat
+define <2 x i32> @scalarize_fptosi_sat(<2 x float> %x) #0 {
+  %sat = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %x)
+  ret <2 x i32> %sat
+}
+
+; CHECK-LABEL: @scalarize_fptoui_sat(
+; CHECK: %sat.i0 = call i32 @llvm.fptoui.sat.i32.f32(float %x.i0)
+; CHECK: %sat.i1 = call i32 @llvm.fptoui.sat.i32.f32(float %x.i1)
+; CHECK: %sat.upto0 = insertelement <2 x i32> poison, i32 %sat.i0, i32 0
+; CHECK: %sat = insertelement <2 x i32> %sat.upto0, i32 %sat.i1, i32 1
+; CHECK: ret <2 x i32> %sat
+define <2 x i32> @scalarize_fptoui_sat(<2 x float> %x) #0 {
+  %sat = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %x)
+  ret <2 x i32> %sat
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll
index b135e28..831dd5f 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll
@@ -6,13 +6,15 @@ declare void @some_func()
 define i32 @need_freeze_of_individual_or_conditions1(i1 %cond1, i1 %cond2, i1 %cond3, i1 %cond4) {
 ; CHECK-LABEL: @need_freeze_of_individual_or_conditions1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = and i1 [[COND4:%.*]], [[COND1:%.*]]
-; CHECK-NEXT:    [[DOTFR:%.*]] = freeze i1 [[TMP0]]
-; CHECK-NEXT:    br i1 [[DOTFR]], label [[ENTRY_SPLIT:%.*]], label [[EXIT_SPLIT:%.*]]
+; CHECK-NEXT:    [[COND4_FR:%.*]] = freeze i1 [[COND4:%.*]]
+; CHECK-NEXT:    [[COND1_FR:%.*]] = freeze i1 [[COND1:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = and i1 [[COND4_FR]], [[COND1_FR]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[EXIT_SPLIT:%.*]]
 ; CHECK:       entry.split:
-; CHECK-NEXT:    [[TMP1:%.*]] = and i1 [[COND2:%.*]], [[COND3:%.*]]
-; CHECK-NEXT:    [[DOTFR2:%.*]] = freeze i1 [[TMP1]]
-; CHECK-NEXT:    br i1 [[DOTFR2]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[EXIT_SPLIT1:%.*]]
+; CHECK-NEXT:    [[COND2_FR:%.*]] = freeze i1 [[COND2:%.*]]
+; CHECK-NEXT:    [[COND3_FR:%.*]] = freeze i1 [[COND3:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i1 [[COND2_FR]], [[COND3_FR]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[EXIT_SPLIT1:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; CHECK:       loop.header:
@@ -50,13 +52,14 @@ exit:
 define i32 @need_freeze_of_individual_or_conditions2(i1 noundef %cond1, i1 %cond2, i1 %cond3, i1 %cond4) {
 ; CHECK-LABEL: @need_freeze_of_individual_or_conditions2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = and i1 [[COND4:%.*]], [[COND1:%.*]]
-; CHECK-NEXT:    [[DOTFR:%.*]] = freeze i1 [[TMP0]]
-; CHECK-NEXT:    br i1 [[DOTFR]], label [[ENTRY_SPLIT:%.*]], label [[EXIT_SPLIT:%.*]]
+; CHECK-NEXT:    [[COND4_FR:%.*]] = freeze i1 [[COND4:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = and i1 [[COND4_FR]], [[COND1:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[EXIT_SPLIT:%.*]]
 ; CHECK:       entry.split:
-; CHECK-NEXT:    [[TMP1:%.*]] = and i1 [[COND2:%.*]], [[COND3:%.*]]
-; CHECK-NEXT:    [[DOTFR2:%.*]] = freeze i1 [[TMP1]]
-; CHECK-NEXT:    br i1 [[DOTFR2]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[EXIT_SPLIT1:%.*]]
+; CHECK-NEXT:    [[COND2_FR:%.*]] = freeze i1 [[COND2:%.*]]
+; CHECK-NEXT:    [[COND3_FR:%.*]] = freeze i1 [[COND3:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i1 [[COND2_FR]], [[COND3_FR]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[EXIT_SPLIT1:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; CHECK:       loop.header:
@@ -94,13 +97,14 @@ exit:
 define i32 @need_freeze_of_individual_or_conditions3(i1 %cond1, i1 %cond2, i1 %cond3, i1 noundef %cond4) {
 ; CHECK-LABEL: @need_freeze_of_individual_or_conditions3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = and i1 [[COND4:%.*]], [[COND1:%.*]]
-; CHECK-NEXT:    [[DOTFR:%.*]] = freeze i1 [[TMP0]]
-; CHECK-NEXT:    br i1 [[DOTFR]], label [[ENTRY_SPLIT:%.*]], label [[EXIT_SPLIT:%.*]]
+; CHECK-NEXT:    [[COND1_FR:%.*]] = freeze i1 [[COND1:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = and i1 [[COND4:%.*]], [[COND1_FR]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[EXIT_SPLIT:%.*]]
 ; CHECK:       entry.split:
-; CHECK-NEXT:    [[TMP1:%.*]] = and i1 [[COND2:%.*]], [[COND3:%.*]]
-; CHECK-NEXT:    [[DOTFR2:%.*]] = freeze i1 [[TMP1]]
-; CHECK-NEXT:    br i1 [[DOTFR2]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[EXIT_SPLIT1:%.*]]
+; CHECK-NEXT:    [[COND2_FR:%.*]] = freeze i1 [[COND2:%.*]]
+; CHECK-NEXT:    [[COND3_FR:%.*]] = freeze i1 [[COND3:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i1 [[COND2_FR]], [[COND3_FR]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[EXIT_SPLIT1:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; CHECK:       loop.header:
@@ -138,9 +142,10 @@ exit:
 define i32 @need_freeze_of_individual_and_conditions1(i1 %cond1, i1 %cond4) {
 ; CHECK-LABEL: @need_freeze_of_individual_and_conditions1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = or i1 [[COND4:%.*]], [[COND1:%.*]]
-; CHECK-NEXT:    [[DOTFR:%.*]] = freeze i1 [[TMP0]]
-; CHECK-NEXT:    br i1 [[DOTFR]], label [[EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:    [[COND4_FR:%.*]] = freeze i1 [[COND4:%.*]]
+; CHECK-NEXT:    [[COND1_FR:%.*]] = freeze i1 [[COND1:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = or i1 [[COND4_FR]], [[COND1_FR]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; CHECK:       loop.header:
@@ -172,9 +177,9 @@ exit:
 define i32 @need_freeze_of_individual_and_conditions2(i1 noundef %cond1, i1 %cond4) {
 ; CHECK-LABEL: @need_freeze_of_individual_and_conditions2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = or i1 [[COND4:%.*]], [[COND1:%.*]]
-; CHECK-NEXT:    [[DOTFR:%.*]] = freeze i1 [[TMP0]]
-; CHECK-NEXT:    br i1 [[DOTFR]], label [[EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:    [[COND4_FR:%.*]] = freeze i1 [[COND4:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = or i1 [[COND4_FR]], [[COND1:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; CHECK:       loop.header:
@@ -206,9 +211,9 @@ exit:
 define i32 @need_freeze_of_individual_and_conditions3(i1 %cond1, i1 noundef %cond4) {
 ; CHECK-LABEL: @need_freeze_of_individual_and_conditions3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = or i1 [[COND4:%.*]], [[COND1:%.*]]
-; CHECK-NEXT:    [[DOTFR:%.*]] = freeze i1 [[TMP0]]
-; CHECK-NEXT:    br i1 [[DOTFR]], label [[EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:    [[COND1_FR:%.*]] = freeze i1 [[COND1:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = or i1 [[COND4:%.*]], [[COND1_FR]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; CHECK:       loop.header:
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll
index f8fdc1a..3208135 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll
@@ -152,11 +152,14 @@ exit:
 define i32 @test_partial_condition_unswitch_or_select(i32* %var, i1 %cond1, i1 %cond2, i1 %cond3, i1 %cond4, i1 %cond5, i1 %cond6) {
 ; CHECK-LABEL: @test_partial_condition_unswitch_or_select(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = or i1 [[COND4:%.*]], [[COND2:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = or i1 [[TMP0]], [[COND3:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP1]], [[COND1:%.*]]
-; CHECK-NEXT:    [[DOTFR:%.*]] = freeze i1 [[TMP2]]
-; CHECK-NEXT:    br i1 [[DOTFR]], label [[LOOP_EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:    [[COND4_FR:%.*]] = freeze i1 [[COND4:%.*]]
+; CHECK-NEXT:    [[COND2_FR:%.*]] = freeze i1 [[COND2:%.*]]
+; CHECK-NEXT:    [[COND3_FR:%.*]] = freeze i1 [[COND3:%.*]]
+; CHECK-NEXT:    [[COND1_FR:%.*]] = freeze i1 [[COND1:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = or i1 [[COND4_FR]], [[COND2_FR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i1 [[TMP0]], [[COND3_FR]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP1]], [[COND1_FR]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[LOOP_EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    [[COND6_FR:%.*]] = freeze i1 [[COND6:%.*]]
 ; CHECK-NEXT:    br i1 [[COND6_FR]], label [[LOOP_EXIT_SPLIT1:%.*]], label [[ENTRY_SPLIT_SPLIT:%.*]]
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
index c696238..4bb5fc2 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
@@ -591,11 +591,14 @@ define i32 @test_partial_condition_unswitch_or(i32* %var, i1 %cond1, i1 %cond2,
 entry:
   br label %loop_begin
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    %[[INV_OR1:.*]] = or i1 %cond4, %cond2
-; CHECK-NEXT:    %[[INV_OR2:.*]] = or i1 %[[INV_OR1]], %cond3
-; CHECK-NEXT:    %[[INV_OR3:.*]] = or i1 %[[INV_OR2]], %cond1
-; CHECK-NEXT:    [[FROZEN:%.+]] = freeze i1 %[[INV_OR3]]
-; CHECK-NEXT:    br i1 [[FROZEN]], label %loop_exit.split, label %entry.split
+; CHECK-NEXT:    %[[C4_FR:.+]] = freeze i1 %cond4
+; CHECK-NEXT:    %[[C2_FR:.+]] = freeze i1 %cond2
+; CHECK-NEXT:    %[[C3_FR:.+]] = freeze i1 %cond3
+; CHECK-NEXT:    %[[C1_FR:.+]] = freeze i1 %cond1
+; CHECK-NEXT:    %[[INV_OR1:.*]] = or i1 %[[C4_FR]], %[[C2_FR]]
+; CHECK-NEXT:    %[[INV_OR2:.*]] = or i1 %[[INV_OR1]], %[[C3_FR]]
+; CHECK-NEXT:    %[[INV_OR3:.*]] = or i1 %[[INV_OR2]], %[[C1_FR]]
+; CHECK-NEXT:    br i1 %[[INV_OR3]], label %loop_exit.split, label %entry.split
 ;
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label %loop_begin
diff --git a/llvm/test/tools/llvm-ifs/exclude.test b/llvm/test/tools/llvm-ifs/exclude.test
index 29f9ab8..7878948 100644
--- a/llvm/test/tools/llvm-ifs/exclude.test
+++ b/llvm/test/tools/llvm-ifs/exclude.test
@@ -2,6 +2,9 @@
 
 # RUN: llvm-ifs --input-format=IFS --output-ifs=- --exclude='exclude*' %s | FileCheck %s
 
+# Check that exclude excludes from elf files too.
+# RUN: llvm-ifs %s --output-elf - --exclude='exclude*' | llvm-ifs - --output-ifs - | FileCheck %s
+
 # RUN: llvm-ifs --input-format=IFS --output-ifs=- --exclude='exclude*' \
 # RUN: --strip-undefined %s | FileCheck %s --check-prefix=BOTH
 
@@ -13,6 +16,7 @@
 --- !ifs-v1
 SoName: somelib.so
 IfsVersion: 3.0
+Target: x86_64-unknown-linux-gnu
 Symbols:
   - { Name: dont_exclude, Type: Func, Undefined: true }
   - { Name: exclude_1, Type: Func }
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
index 5c0fc8e..0662794 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
@@ -1269,7 +1269,7 @@ vzeroupper
 # CHECK-NEXT:  1      2     1.00                        vmovd	%xmm0, %ecx
 # CHECK-NEXT:  2      1     1.00           *            vmovd	%xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        vmovddup	%xmm0, %xmm2
-# CHECK-NEXT:  1      5     0.50    *                   vmovddup	(%rax), %xmm2
+# CHECK-NEXT:  1      6     0.50    *                   vmovddup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        vmovddup	%ymm0, %ymm2
 # CHECK-NEXT:  1      7     0.50    *                   vmovddup	(%rax), %ymm2
 # CHECK-NEXT:  1      1     0.33                        vmovdqa	%xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse3.s
index df495ed..4c5908e 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse3.s
@@ -58,7 +58,7 @@ mwait
 # CHECK-NEXT:  1      6     0.50    *                   lddqu	(%rax), %xmm2
 # CHECK-NEXT:  1      100   0.25                  U     monitor
 # CHECK-NEXT:  1      1     1.00                        movddup	%xmm0, %xmm2
-# CHECK-NEXT:  1      5     0.50    *                   movddup	(%rax), %xmm2
+# CHECK-NEXT:  1      6     0.50    *                   movddup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        movshdup	%xmm0, %xmm2
 # CHECK-NEXT:  1      6     0.50    *                   movshdup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        movsldup	%xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
index 005c2ee..38170f8 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
@@ -1269,7 +1269,7 @@ vzeroupper
 # CHECK-NEXT:  1      2     1.00                        vmovd	%xmm0, %ecx
 # CHECK-NEXT:  2      1     1.00           *            vmovd	%xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        vmovddup	%xmm0, %xmm2
-# CHECK-NEXT:  1      5     0.50    *                   vmovddup	(%rax), %xmm2
+# CHECK-NEXT:  1      6     0.50    *                   vmovddup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        vmovddup	%ymm0, %ymm2
 # CHECK-NEXT:  1      7     0.50    *                   vmovddup	(%rax), %ymm2
 # CHECK-NEXT:  1      1     0.33                        vmovdqa	%xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s
index 61cf0f1..e6bec19 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s
@@ -58,7 +58,7 @@ mwait
 # CHECK-NEXT:  1      6     0.50    *                   lddqu	(%rax), %xmm2
 # CHECK-NEXT:  1      100   0.25                  U     monitor
 # CHECK-NEXT:  1      1     1.00                        movddup	%xmm0, %xmm2
-# CHECK-NEXT:  1      5     0.50    *                   movddup	(%rax), %xmm2
+# CHECK-NEXT:  1      6     0.50    *                   movddup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        movshdup	%xmm0, %xmm2
 # CHECK-NEXT:  1      6     0.50    *                   movshdup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        movsldup	%xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-reduce/mir/drop-ir-references.mir b/llvm/test/tools/llvm-reduce/mir/drop-ir-references.mir
new file mode 100644
index 0000000..88d9566
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/mir/drop-ir-references.mir
@@ -0,0 +1,86 @@
+# REQUIRES: amdgpu-registered-target
+# RUN: llvm-reduce -simplify-mir -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log
+# RUN: FileCheck --check-prefix=RESULT %s < %t
+
+# CHECK-INTERESTINGNESS: G_LOAD
+# CHECK-INTERESTINGNESS: G_LOAD
+# CHECK-INTERESTINGNESS: G_LOAD
+# CHECK-INTERESTINGNESS: G_LOAD
+# CHECK-INTERESTINGNESS: G_STORE
+# CHECK-INTERESTINGNESS: G_STORE
+# CHECK-INTERESTINGNESS: G_STORE %{{[0-9]+}}(s32), %{{[0-9]+}}(p5) :: (store (s32) into %ir.keep.store, addrspace 5)
+
+# RESULT: name: func
+# RESULT: stack:
+# RESULT-NEXT: - { id: 0, size: 32, alignment: 8 }
+
+# RESULT: body:
+# RESULT-NEXT: bb.0:
+# RESULT: %{{[0-9]+}}:_(<2 x s16>) = G_LOAD %{{[0-9]+}}(p1) :: (load (<2 x s16>), align 32, addrspace 1)
+
+# RESULT: bb.1:
+# RESULT-NEXT: %{{[0-9]+}}:_(<2 x s32>) = G_LOAD %{{[0-9]+}}(p1) :: (load (<2 x s32>), addrspace 3)
+# RESULT-NEXT: %{{[0-9]+}}:_(<2 x s32>) = G_LOAD %{{[0-9]+}}(p1) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
+# RESULT-NEXT: %{{[0-9]+}}:_(<2 x s32>) = G_LOAD %{{[0-9]+}}(p1) :: (load (<2 x s32>) from unknown-address + 12, align 4, basealign 8, addrspace 3)
+
+
+# RESULT: bb.2:
+# RESULT-NEXT: G_STORE %{{[0-9]+}}(<2 x s32>), %{{[0-9]+}}(p5) :: (store (<2 x s32>) into %fixed-stack.0, addrspace 5)
+# RESULT-NEXT: G_STORE %{{[0-9]+}}(<2 x s32>), %{{[0-9]+}}(p5) :: (store (<2 x s32>) into %stack.0, addrspace 5)
+
+# RESULT: bb.3:
+# RESULT-NEXT: G_STORE %{{[0-9]+}}(s32), %{{[0-9]+}}(p5) :: (store (s32) into %ir.keep.store, addrspace 5)
+# RESULT-NEXT: S_ENDPGM
+
+--- |
+  define void @func(<2 x i16> addrspace(1)* %argptr0, <2 x i32> addrspace(3)* %argptr1, i32 addrspace(5)* %keep.store) {
+  entry:
+    %alloca = alloca i32, addrspace(5)
+    br label %block.name.0
+
+  block.name.0:
+    br label %block.name.1
+
+  block.name.1:
+    br label %exit
+
+  exit:
+    ret void
+  }
+
+...
+---
+name: func
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, offset: 16, size: 8, alignment: 8 }
+stack:
+  - { id: 0, size: 32, alignment: 8, name: alloca }
+body:             |
+  bb.0.entry:
+    S_WAITCNT 0
+    S_NOP 0
+    %0:_(p1) = G_IMPLICIT_DEF
+    %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>) from %ir.argptr0, align 32, addrspace 1)
+    %2:_(<2 x s32>) = G_ZEXT %1
+
+  bb.1.block.name.0:
+    %3:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>) from %ir.argptr1, addrspace 3)
+    %4:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>) from %ir.argptr1 + 8, addrspace 3)
+    %5:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>) from %ir.argptr1 + 12, addrspace 3)
+
+  bb.2.block.name.0:
+    %6:_(<2 x s32>) = G_ADD %2, %3
+    %7:_(<2 x s32>) = G_ADD %6, %4
+    %8:_(<2 x s32>) = G_ADD %7, %5
+    %9:_(p5) = G_IMPLICIT_DEF
+    G_STORE %8, %9 :: (store (<2 x s32>) into %fixed-stack.0, addrspace 5)
+    G_STORE %8, %9 :: (store (<2 x s32>) into %stack.0.alloca, addrspace 5)
+
+  bb.3.exit:
+    %10:_(p5) = G_IMPLICIT_DEF
+    %11:_(s32) = G_IMPLICIT_DEF
+    G_STORE %11, %10 :: (store (s32) into %ir.keep.store, addrspace 5)
+    S_ENDPGM 0
+...
+
diff --git a/llvm/test/tools/llvm-reduce/mir/generic-vreg.mir b/llvm/test/tools/llvm-reduce/mir/generic-vreg.mir
index edb2cf9..b1099a2 100644
--- a/llvm/test/tools/llvm-reduce/mir/generic-vreg.mir
+++ b/llvm/test/tools/llvm-reduce/mir/generic-vreg.mir
@@ -12,13 +12,17 @@
 # CHECK-INTERESTINGNESS: G_IMPLICIT_DEF
 # CHECK-INTERESTINGNESS: G_STORE
 
-# RESULT: %v0:vgpr(s32) = COPY $vgpr0, implicit-def %9(<2 x s16>), implicit-def %10(s64), implicit-def %11(s64), implicit-def %12(<2 x s32>)
-# RESULT-NEXT: %unused_load_ptr:sgpr(p1) = G_IMPLICIT_DEF
-# RESULT-NEXT: %aoeu:_(s64) = G_BITCAST %12(<2 x s32>)
+# RESULT: %{{[0-9]+}}:vgpr(s32) = G_IMPLICIT_DEF
+# RESULT-NEXT: %{{[0-9]+}}:vgpr(<2 x s16>) = G_IMPLICIT_DEF
+# RESULT-NEXT: %{{[0-9]+}}:sgpr(p1) = G_IMPLICIT_DEF
+# RESULT-NEXT: %{{[0-9]+}}:_(s64) = G_IMPLICIT_DEF
+# RESULT-NEXT: %{{[0-9]+}}:vreg_64(s64) = IMPLICIT_DEF
+# RESULT-NEXT: %{{[0-9]+}}:_(<2 x s32>) = G_IMPLICIT_DEF
+# RESULT-NEXT: %aoeu:_(s64) = G_BITCAST %14(<2 x s32>)
 # RESULT-NEXT: %add:_(s64) = G_ADD %aoeu, %aoeu
 # RESULT-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF
-# RESULT-NEXT: G_STORE %v0(s32), %ptr(p1) :: (store (s32), addrspace 1)
-# RESULT-NEXT: S_ENDPGM 0, implicit %add(s64), implicit %9(<2 x s16>), implicit %11(s64)
+# RESULT-NEXT: G_STORE %{{[0-9]+}}(s32), %ptr(p1) :: (store (s32), addrspace 1)
+# RESULT-NEXT: S_ENDPGM 0, implicit %add(s64), implicit %{{[0-9]+}}(<2 x s16>), implicit %{{[0-9]+}}(s64)
 
 ---
 name:            f
diff --git a/llvm/test/tools/llvm-reduce/mir/instr-reduce.mir b/llvm/test/tools/llvm-reduce/mir/instr-reduce.mir
index f252ff3..e6e03b4 100644
--- a/llvm/test/tools/llvm-reduce/mir/instr-reduce.mir
+++ b/llvm/test/tools/llvm-reduce/mir/instr-reduce.mir
@@ -8,8 +8,8 @@
 # pattern in the output and that combined with that the MIR has to be valid
 # (pass verify) results in the given sequence.
 
-# CHECK:      %0:gpr = COPY $x10
-# CHECK-NEXT: %2:gpr = ADDI %0, 5
+# CHECK:      [[IMPDEF:%[0-9]+]]:gpr = IMPLICIT_DEF
+# CHECK-NEXT: %{{[0-9]+}}:gpr = ADDI [[IMPDEF]], 5
 # CHECK-NEXT: PseudoRET implicit $x10
 
 ...
diff --git a/llvm/test/tools/llvm-reduce/mir/preserve-block-info.mir b/llvm/test/tools/llvm-reduce/mir/preserve-block-info.mir
index f20ebd5..098adb75 100644
--- a/llvm/test/tools/llvm-reduce/mir/preserve-block-info.mir
+++ b/llvm/test/tools/llvm-reduce/mir/preserve-block-info.mir
@@ -1,5 +1,5 @@
 # REQUIRES: amdgpu-registered-target
-# RUN: llvm-reduce -simplify-mir -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log
+# RUN: llvm-reduce -simplify-mir --delta-passes=instructions -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log
 # RUN: FileCheck --match-full-lines --check-prefix=RESULT %s < %t
 
 # CHECK-INTERESTINGNESS: V_MOV_B32
diff --git a/llvm/test/tools/llvm-reduce/mir/preserve-frame-info.mir b/llvm/test/tools/llvm-reduce/mir/preserve-frame-info.mir
index 8046e29..1a89194 100644
--- a/llvm/test/tools/llvm-reduce/mir/preserve-frame-info.mir
+++ b/llvm/test/tools/llvm-reduce/mir/preserve-frame-info.mir
@@ -1,5 +1,5 @@
 # REQUIRES: amdgpu-registered-target
-# RUN: llvm-reduce -simplify-mir -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log
+# RUN: llvm-reduce -simplify-mir -mtriple=amdgcn-amd-amdhsa --delta-passes=instructions --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log
 # RUN: FileCheck --match-full-lines --check-prefix=RESULT %s < %t
 
 # CHECK-INTERESTINGNESS-COUNT-15: V_MOV_B32
@@ -46,7 +46,7 @@
 # RESULT-NEXT:  - { id: 9, name: guard, offset: 128, size: 4, alignment: 4 }
 
 
-# RESULT: S_NOP 0
+# RESULT: bb.0:
 # RESULT-NEXT: [[FI0:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
 # RESULT-NEXT: [[FI1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0.bigalloca, implicit $exec
 # RESULT-NEXT: [[FI2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.2, implicit $exec
diff --git a/llvm/test/tools/llvm-reduce/mir/remove-frame-destroy.mir b/llvm/test/tools/llvm-reduce/mir/remove-frame-destroy.mir
new file mode 100644
index 0000000..7a7edcb
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/mir/remove-frame-destroy.mir
@@ -0,0 +1,24 @@
+# REQUIRES: amdgpu-registered-target
+# RUN: llvm-reduce --delta-passes=instructions -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log
+# RUN: FileCheck --check-prefix=RESULT %s < %t
+
+# CHECK-INTERESTINGNESS: S_NOP 0
+
+# RESULT: ADJCALLSTACKUP
+# RESULT-NEXT: ADJCALLSTACKDOWN
+# RESULT-NEXT: S_ENDPGM 0
+
+---
+name: frame_setup_destroy
+tracksRegLiveness: true
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    S_NOP 0
+    ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    S_NOP 0
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    S_ENDPGM 0
+
+...
diff --git a/llvm/test/tools/llvm-reduce/mir/subreg-def0.mir b/llvm/test/tools/llvm-reduce/mir/subreg-def0.mir
index e37ab45..7a44956 100644
--- a/llvm/test/tools/llvm-reduce/mir/subreg-def0.mir
+++ b/llvm/test/tools/llvm-reduce/mir/subreg-def0.mir
@@ -4,7 +4,8 @@
 
 # CHECK-INTERESTINGNESS: V_ADD_U32
 
-# RESULT: S_WAITCNT 0, implicit-def undef %2.sub1, implicit-def %3.sub0
+# RESULT: undef %2.sub1:vreg_64 = IMPLICIT_DEF
+# RESULT-NEXT: %3.sub0:vreg_64 = IMPLICIT_DEF
 # RESULT-NEXT: %1:vgpr_32 = V_ADD_U32_e32 %2.sub0, %2.sub1, implicit $exec
 # RESULT-NEXT: S_ENDPGM 0, implicit %1
 
diff --git a/llvm/test/tools/llvm-reduce/mir/subreg-def1.mir b/llvm/test/tools/llvm-reduce/mir/subreg-def1.mir
index 8be7f64..067fbf4 100644
--- a/llvm/test/tools/llvm-reduce/mir/subreg-def1.mir
+++ b/llvm/test/tools/llvm-reduce/mir/subreg-def1.mir
@@ -5,11 +5,10 @@
 # CHECK-INTERESTINGNESS: %{{[0-9]+}}.sub0:vreg_64 = V_ADD_U32_e32 %{{[0-9]+}}.sub1, %{{[0-9]+}}.sub0, implicit $exec
 # CHECK-INTERESTINGNESS: %{{[0-9]+}}.sub0:vreg_64 = V_ADD_U32_e32 4, %{{[0-9]+}}.sub0, implicit $exec
 
-# RESULT: S_WAITCNT 0, implicit-def undef %2.sub1
-# RESULT-NEXT: %{{[0-9]+}}.sub0:vreg_64 = V_ADD_U32_e32 %{{[0-9]+}}.sub1, %{{[0-9]+}}.sub0, implicit $exec
-# RESULT-NEXT: %{{[0-9]+}}.sub0:vreg_64 = V_ADD_U32_e32 4, %{{[0-9]+}}.sub0, implicit $exec
-# RESULT-NEXT: S_ENDPGM 0, implicit %{{[0-9]+}}, implicit %{{[0-9]+}}.sub0
-
+# RESULT: undef %2.sub1:vreg_64 = IMPLICIT_DEF
+# RESULT-NEXT: %0.sub0:vreg_64 = V_ADD_U32_e32 %2.sub1, %2.sub0, implicit $exec
+# RESULT-NEXT: %1.sub0:vreg_64 = V_ADD_U32_e32 4, %2.sub0, implicit $exec
+# RESULT-NEXT: S_ENDPGM 0, implicit %2, implicit %2.sub0
 ---
 name:            f
 tracksRegLiveness: true
diff --git a/llvm/tools/llvm-ifs/llvm-ifs.cpp b/llvm/tools/llvm-ifs/llvm-ifs.cpp
index bf81a60..1ac8c0d 100644
--- a/llvm/tools/llvm-ifs/llvm-ifs.cpp
+++ b/llvm/tools/llvm-ifs/llvm-ifs.cpp
@@ -429,6 +429,9 @@ int main(int argc, char *argv[]) {
   if (StripNeededLibs)
     Stub.NeededLibs.clear();
 
+  if (Error E = filterIFSSyms(Stub, StripUndefined, ExcludeSyms))
+    fatalError(std::move(E));
+
   if (OutputELFFilePath.getNumOccurrences() == 0 &&
       OutputIFSFilePath.getNumOccurrences() == 0 &&
       OutputTBDFilePath.getNumOccurrences() == 0) {
@@ -485,8 +488,6 @@ int main(int argc, char *argv[]) {
         stripIFSTarget(Stub, StripIFSTarget, StripIFSArch,
                        StripIFSEndiannessWidth, StripIFSBitWidth);
       }
-      if (Error E = filterIFSSyms(Stub, StripUndefined, ExcludeSyms))
-        fatalError(std::move(E));
       Error IFSWriteError = writeIFS(OutputFilePath.getValue(), Stub);
       if (IFSWriteError)
         fatalError(std::move(IFSWriteError));
@@ -537,8 +538,6 @@ int main(int argc, char *argv[]) {
         stripIFSTarget(Stub, StripIFSTarget, StripIFSArch,
                        StripIFSEndiannessWidth, StripIFSBitWidth);
       }
-      if (Error E = filterIFSSyms(Stub, StripUndefined, ExcludeSyms))
-        fatalError(std::move(E));
       Error IFSWriteError = writeIFS(OutputIFSFilePath.getValue(), Stub);
       if (IFSWriteError)
         fatalError(std::move(IFSWriteError));
diff --git a/llvm/tools/llvm-reduce/CMakeLists.txt b/llvm/tools/llvm-reduce/CMakeLists.txt
index 6588fd8..88229d6 100644
--- a/llvm/tools/llvm-reduce/CMakeLists.txt
+++ b/llvm/tools/llvm-reduce/CMakeLists.txt
@@ -39,6 +39,7 @@ add_llvm_tool(llvm-reduce
   deltas/ReduceOperandsSkip.cpp
   deltas/ReduceOperandsToArgs.cpp
   deltas/ReduceInstructionsMIR.cpp
+  deltas/ReduceIRReferences.cpp
   llvm-reduce.cpp
 
   DEPENDS
diff --git a/llvm/tools/llvm-reduce/DeltaManager.cpp b/llvm/tools/llvm-reduce/DeltaManager.cpp
index 4abdf38..d3e8740 100644
--- a/llvm/tools/llvm-reduce/DeltaManager.cpp
+++ b/llvm/tools/llvm-reduce/DeltaManager.cpp
@@ -24,6 +24,7 @@
 #include "deltas/ReduceGlobalValues.h"
 #include "deltas/ReduceGlobalVarInitializers.h"
 #include "deltas/ReduceGlobalVars.h"
+#include "deltas/ReduceIRReferences.h"
 #include "deltas/ReduceInstructions.h"
 #include "deltas/ReduceInstructionsMIR.h"
 #include "deltas/ReduceMetadata.h"
@@ -67,7 +68,11 @@ static cl::opt<std::string>
   DELTA_PASS("module-data", reduceModuleDataDeltaPass)
 
 #define DELTA_PASSES_MIR                                                       \
-  DELTA_PASS("instructions", reduceInstructionsMIRDeltaPass)
+  DELTA_PASS("instructions", reduceInstructionsMIRDeltaPass)                   \
+  DELTA_PASS("ir-instruction-references",                                      \
+             reduceIRInstructionReferencesDeltaPass)                           \
+  DELTA_PASS("ir-block-references", reduceIRBlockReferencesDeltaPass)          \
+  DELTA_PASS("ir-function-references", reduceIRFunctionReferencesDeltaPass)
 
 static void runAllDeltaPasses(TestRunner &Tester) {
 #define DELTA_PASS(NAME, FUNC) FUNC(Tester);
@@ -105,18 +110,8 @@ void llvm::printDeltaPasses(raw_ostream &OS) {
 #undef DELTA_PASS
 }
 
-// FIXME: We might want to use a different metric than "number of
-// bytes in serialized IR" to detect non-progress of the main delta
-// loop
-static int getIRSize(TestRunner &Tester) {
-  std::string Str;
-  raw_string_ostream SS(Str);
-  Tester.getProgram().print(SS, /*AnnotationWriter=*/nullptr);
-  return Str.length();
-}
-
 void llvm::runDeltaPasses(TestRunner &Tester, int MaxPassIterations) {
-  int OldSize = getIRSize(Tester);
+  uint64_t OldComplexity = Tester.getProgram().getComplexityScore();
   for (int Iter = 0; Iter < MaxPassIterations; ++Iter) {
     if (DeltaPasses.empty()) {
       runAllDeltaPasses(Tester);
@@ -128,9 +123,9 @@ void llvm::runDeltaPasses(TestRunner &Tester, int MaxPassIterations) {
         Passes = Split.second;
       }
     }
-    int NewSize = getIRSize(Tester);
-    if (NewSize >= OldSize)
+    uint64_t NewComplexity = Tester.getProgram().getComplexityScore();
+    if (NewComplexity >= OldComplexity)
       break;
-    OldSize = NewSize;
+    OldComplexity = NewComplexity;
   }
 }
diff --git a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp
index 01786a4..91ea752 100644
--- a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp
+++ b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp
@@ -473,3 +473,80 @@ void ReducerWorkItem::print(raw_ostream &ROS, void *p) const {
              /*ShouldPreserveUseListOrder=*/true);
   }
 }
+
+// FIXME: We might want to use a different metric than "number of
+// bytes in serialized IR" to detect non-progress of the main delta
+// loop
+uint64_t ReducerWorkItem::getIRSize() const {
+  std::string Str;
+  raw_string_ostream SS(Str);
+  print(SS, /*AnnotationWriter=*/nullptr);
+  return Str.length();
+}
+
+/// Try to produce some number that indicates a function is getting smaller /
+/// simpler.
+static uint64_t computeMIRComplexityScoreImpl(const MachineFunction &MF) {
+  uint64_t Score = 0;
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  // Add for stack objects
+  Score += MFI.getNumObjects();
+
+  // Add in the block count.
+  Score += 2 * MF.size();
+
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      const unsigned Opc = MI.getOpcode();
+
+      // Reductions may want or need to introduce implicit_defs, so don't count
+      // them.
+      // TODO: These probably should count in some way.
+      if (Opc == TargetOpcode::IMPLICIT_DEF ||
+          Opc == TargetOpcode::G_IMPLICIT_DEF)
+        continue;
+
+      // Each instruction adds to the score
+      Score += 4;
+
+      if (Opc == TargetOpcode::PHI || Opc == TargetOpcode::G_PHI ||
+          Opc == TargetOpcode::INLINEASM || Opc == TargetOpcode::INLINEASM_BR)
+        ++Score;
+
+      if (MI.getFlags() != 0)
+        ++Score;
+
+      // Increase weight for more operands.
+      for (const MachineOperand &MO : MI.operands()) {
+        ++Score;
+
+        // Treat registers as more complex.
+        if (MO.isReg()) {
+          ++Score;
+
+          // And subregisters as even more complex.
+          if (MO.getSubReg()) {
+            ++Score;
+            if (MO.isDef())
+              ++Score;
+          }
+        } else if (MO.isRegMask())
+          ++Score;
+      }
+    }
+  }
+
+  return Score;
+}
+
+uint64_t ReducerWorkItem::computeMIRComplexityScore() const {
+  uint64_t Score = 0;
+
+  for (const Function &F : getModule()) {
+    if (auto *MF = MMI->getMachineFunction(F))
+      Score += computeMIRComplexityScoreImpl(*MF);
+  }
+
+  return Score;
+}
diff --git a/llvm/tools/llvm-reduce/ReducerWorkItem.h b/llvm/tools/llvm-reduce/ReducerWorkItem.h
index d20ca18..89330d2b 100644
--- a/llvm/tools/llvm-reduce/ReducerWorkItem.h
+++ b/llvm/tools/llvm-reduce/ReducerWorkItem.h
@@ -27,6 +27,15 @@ public:
 
   void print(raw_ostream &ROS, void *p = nullptr) const;
   operator Module &() const { return *M; }
+
+  /// Return a number to indicate whether there was any reduction progress.
+  uint64_t getComplexityScore() const {
+    return isMIR() ? computeMIRComplexityScore() : getIRSize();
+  }
+
+private:
+  uint64_t computeMIRComplexityScore() const;
+  uint64_t getIRSize() const;
 };
 
 std::unique_ptr<ReducerWorkItem>
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.cpp b/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.cpp
new file mode 100644
index 0000000..975bdc2
--- /dev/null
+++ b/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.cpp
@@ -0,0 +1,82 @@
+//===- ReduceIRReferences.cpp - Specialized Delta Pass --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to remove backreferences to the IR from MIR. In particular, this will remove
+// the Value references in MachineMemOperands.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ReduceIRReferences.h"
+#include "Delta.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+using namespace llvm;
+
+static void dropIRReferencesFromInstructions(Oracle &O, MachineFunction &MF) {
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+      if (!O.shouldKeep()) {
+        for (MachineMemOperand *MMO : MI.memoperands()) {
+          // Leave behind pseudo source values.
+          // TODO: Removing all MemOperand values is a further reduction step.
+          if (MMO->getPointerInfo().V.is<const Value *>())
+            MMO->setValue(static_cast<const Value *>(nullptr));
+        }
+
+        // TODO: Try to remove GlobalValue references and metadata
+      }
+    }
+  }
+}
+
+static void stripIRFromInstructions(Oracle &O, ReducerWorkItem &WorkItem) {
+  for (const Function &F : WorkItem.getModule()) {
+    if (auto *MF = WorkItem.MMI->getMachineFunction(F))
+      dropIRReferencesFromInstructions(O, *MF);
+  }
+}
+
+static void stripIRFromBlocks(Oracle &O, ReducerWorkItem &WorkItem) {
+  for (const Function &F : WorkItem.getModule()) {
+    if (auto *MF = WorkItem.MMI->getMachineFunction(F)) {
+      for (MachineBasicBlock &MBB : *MF) {
+        if (!O.shouldKeep())
+          MBB.clearBasicBlock();
+      }
+    }
+  }
+}
+
+static void stripIRFromFunctions(Oracle &O, ReducerWorkItem &WorkItem) {
+  for (const Function &F : WorkItem.getModule()) {
+    if (!O.shouldKeep()) {
+      if (auto *MF = WorkItem.MMI->getMachineFunction(F)) {
+        MachineFrameInfo &MFI = MF->getFrameInfo();
+        for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
+             I != E; ++I)
+          MFI.clearObjectAllocation(I);
+      }
+    }
+  }
+}
+
+void llvm::reduceIRInstructionReferencesDeltaPass(TestRunner &Test) {
+  outs() << "*** Reducing IR references from instructions...\n";
+  runDeltaPass(Test, stripIRFromInstructions);
+}
+
+void llvm::reduceIRBlockReferencesDeltaPass(TestRunner &Test) {
+  outs() << "*** Reducing IR references from blocks...\n";
+  runDeltaPass(Test, stripIRFromBlocks);
+}
+
+void llvm::reduceIRFunctionReferencesDeltaPass(TestRunner &Test) {
+  outs() << "*** Reducing IR references from functions...\n";
+  runDeltaPass(Test, stripIRFromFunctions);
+}
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.h b/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.h
new file mode 100644
index 0000000..548559a
--- /dev/null
+++ b/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.h
@@ -0,0 +1,31 @@
+//===- ReduceIRReferences.h  - Specialized Delta Pass -----------*- c++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce uninteresting IR references from the MachineFunction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEIRREFERENCES_MIR_H
+#define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEIRREFERENCES_MIR_H
+
+namespace llvm {
+class TestRunner;
+
+/// Remove IR references from instructions (i.e. from memory operands)
+void reduceIRInstructionReferencesDeltaPass(TestRunner &Test);
+
+/// Remove IR BasicBlock references (the block names)
+void reduceIRBlockReferencesDeltaPass(TestRunner &Test);
+
+/// Remove IR references from function level fields (e.g. frame object names)
+void reduceIRFunctionReferencesDeltaPass(TestRunner &Test);
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.cpp b/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.cpp
index 3be2eae..f5486cf 100644
--- a/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.cpp
+++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.cpp
@@ -45,6 +45,21 @@ static Register getPrevDefOfRCInMBB(MachineBasicBlock &MBB,
   return 0;
 }
 
+static bool shouldNotRemoveInstruction(const TargetInstrInfo &TII,
+                                       const MachineInstr &MI) {
+  if (MI.isTerminator())
+    return true;
+
+  // The MIR is almost certainly going to be invalid if frame instructions are
+  // deleted individually since they need to come in balanced pairs, so don't
+  // try to delete them.
+  if (MI.getOpcode() == TII.getCallFrameSetupOpcode() ||
+      MI.getOpcode() == TII.getCallFrameDestroyOpcode())
+    return true;
+
+  return false;
+}
+
 static void extractInstrFromFunction(Oracle &O, MachineFunction &MF) {
   MachineDominatorTree MDT;
   MDT.runOnMachineFunction(MF);
@@ -52,17 +67,17 @@ static void extractInstrFromFunction(Oracle &O, MachineFunction &MF) {
   auto MRI = &MF.getRegInfo();
   SetVector<MachineInstr *> ToDelete;
 
-  MachineInstr *TopMI = nullptr;
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  const TargetInstrInfo *TII = STI.getInstrInfo();
+  MachineBasicBlock *EntryMBB = &*MF.begin();
+  MachineBasicBlock::iterator EntryInsPt =
+      EntryMBB->SkipPHIsLabelsAndDebug(EntryMBB->begin());
 
   // Mark MIs for deletion according to some criteria.
   for (auto &MBB : MF) {
     for (auto &MI : MBB) {
-      if (MI.isTerminator())
-        continue;
-      if (MBB.isEntryBlock() && !TopMI) {
-        TopMI = &MI;
+      if (shouldNotRemoveInstruction(*TII, MI))
         continue;
-      }
       if (!O.shouldKeep())
         ToDelete.insert(&MI);
     }
@@ -101,19 +116,15 @@ static void extractInstrFromFunction(Oracle &O, MachineFunction &MF) {
         }
       }
 
-      // If no dominating definition was found then add an implicit one to the
-      // first instruction in the entry block.
-
-      // FIXME: This should really insert IMPLICIT_DEF or G_IMPLICIT_DEF. We
-      // need to refine the reduction quality metric from number of serialized
-      // bytes to continue progressing if we're going to introduce new
-      // instructions.
-      if (!NewReg && TopMI) {
+      // If no dominating definition was found then add an implicit def to the
+      // top of the entry block.
+      if (!NewReg) {
         NewReg = MRI->cloneVirtualRegister(Reg);
-        TopMI->addOperand(MachineOperand::CreateReg(
-            NewReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/,
-            MO.isDead(), MO.isUndef(), MO.isEarlyClobber(), MO.getSubReg(),
-            /*IsDebug*/ false, MO.isInternalRead()));
+        bool IsGeneric = MRI->getRegClassOrNull(Reg) == nullptr;
+        unsigned ImpDef = IsGeneric ? TargetOpcode::G_IMPLICIT_DEF
+                                    : TargetOpcode::IMPLICIT_DEF;
+        BuildMI(*EntryMBB, EntryInsPt, DebugLoc(), TII->get(ImpDef))
+          .addReg(NewReg, getRegState(MO), MO.getSubReg());
       }
 
       // Update all uses.
diff --git a/llvm/unittests/Object/CMakeLists.txt b/llvm/unittests/Object/CMakeLists.txt
index ea7df22..d30125d 100644
--- a/llvm/unittests/Object/CMakeLists.txt
+++ b/llvm/unittests/Object/CMakeLists.txt
@@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_unittest(ObjectTests
   ArchiveTest.cpp
+  DXContainerTest.cpp
   ELFObjectFileTest.cpp
   ELFTypesTest.cpp
   ELFTest.cpp
diff --git a/llvm/unittests/Object/DXContainerTest.cpp b/llvm/unittests/Object/DXContainerTest.cpp
new file mode 100644
index 0000000..95bafe8
--- /dev/null
+++ b/llvm/unittests/Object/DXContainerTest.cpp
@@ -0,0 +1,48 @@
+//===- DXContainerTest.cpp - Tests for DXContainerFile --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/DXContainer.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+template <std::size_t X> MemoryBufferRef getMemoryBuffer(uint8_t Data[X]) {
+  StringRef Obj(reinterpret_cast<char *>(&Data[0]), X);
+  return MemoryBufferRef(Obj, "");
+}
+
+TEST(DXCFile, IdentifyMagic) {
+  StringRef Buffer("DXBC");
+  EXPECT_EQ(identify_magic(Buffer), file_magic::dxcontainer_object);
+}
+
+TEST(DXCFile, ParseHeaderErrors) {
+  uint8_t Buffer[] = {0x44, 0x58, 0x42, 0x43};
+  EXPECT_THAT_EXPECTED(
+      DXContainer::create(getMemoryBuffer<4>(Buffer)),
+      FailedWithMessage("Reading structure out of file bounds"));
+}
+
+TEST(DXCFile, ParseHeader) {
+  uint8_t Buffer[] = {0x44, 0x58, 0x42, 0x43, 0x00, 0x00, 0x00, 0x00,
+                      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                      0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+                      0x70, 0x0D, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00};
+  DXContainer C =
+      llvm::cantFail(DXContainer::create(getMemoryBuffer<32>(Buffer)));
+  EXPECT_TRUE(memcmp(C.getHeader().Magic, "DXBC", 4) == 0);
+  EXPECT_TRUE(memcmp(C.getHeader().FileHash.Digest,
+                     "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16) == 0);
+  EXPECT_EQ(C.getHeader().Version.Major, 1u);
+  EXPECT_EQ(C.getHeader().Version.Minor, 0u);
+}
diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp
index dd02d92..ec9901a 100644
--- a/llvm/unittests/Support/CommandLineTest.cpp
+++ b/llvm/unittests/Support/CommandLineTest.cpp
@@ -237,12 +237,42 @@ TEST(CommandLineTest, TokenizeWindowsCommandLine2) {
 }
 
 TEST(CommandLineTest, TokenizeWindowsCommandLineQuotedLastArgument) {
+  // Whitespace at the end of the command line doesn't cause an empty last word
+  const char Input0[] = R"(a b c d )";
+  const char *const Output0[] = {"a", "b", "c", "d"};
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input0, Output0);
+
+  // But an explicit "" does
   const char Input1[] = R"(a b c d "")";
   const char *const Output1[] = {"a", "b", "c", "d", ""};
   testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input1, Output1);
+
+  // An unterminated quoted string is also emitted as an argument word, empty
+  // or not
   const char Input2[] = R"(a b c d ")";
-  const char *const Output2[] = {"a", "b", "c", "d"};
+  const char *const Output2[] = {"a", "b", "c", "d", ""};
   testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input2, Output2);
+  const char Input3[] = R"(a b c d "text)";
+  const char *const Output3[] = {"a", "b", "c", "d", "text"};
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input3, Output3);
+}
+
+TEST(CommandLineTest, TokenizeWindowsCommandLineExeName) {
+  const char Input1[] =
+      R"("C:\Program Files\Whatever\"clang.exe z.c -DY=\"x\")";
+  const char *const Output1[] = {"C:\\Program Files\\Whatever\\clang.exe",
+                                 "z.c", "-DY=\"x\""};
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLineFull, Input1, Output1);
+
+  const char Input2[] = "\"a\\\"b c\\\"d\n\"e\\\"f g\\\"h\n";
+  const char *const Output2[] = {"a\\b", "c\"d", nullptr,
+                                 "e\\f", "g\"h", nullptr};
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLineFull, Input2, Output2,
+                           /*MarkEOLs=*/true);
+
+  const char Input3[] = R"(\\server\share\subdir\clang.exe)";
+  const char *const Output3[] = {"\\\\server\\share\\subdir\\clang.exe"};
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLineFull, Input3, Output3);
 }
 
 TEST(CommandLineTest, TokenizeAndMarkEOLs) {
diff --git a/llvm/unittests/Support/Host.cpp b/llvm/unittests/Support/Host.cpp
index 484c88b..590d2d7 100644
--- a/llvm/unittests/Support/Host.cpp
+++ b/llvm/unittests/Support/Host.cpp
@@ -133,6 +133,10 @@ TEST(getLinuxHostCPUName, AArch64) {
                                               "CPU part        : 0xc01"),
             "saphira");
 
+  EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0xc0\n"
+                                              "CPU part        : 0xac3"),
+            "ampere1");
+
   // MSM8992/4 weirdness
   StringRef MSM8992ProcCpuInfo = R"(
 Processor       : AArch64 Processor rev 3 (aarch64)
diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp
index 186fe57..e9d6a3f 100644
--- a/llvm/unittests/Support/TargetParserTest.cpp
+++ b/llvm/unittests/Support/TargetParserTest.cpp
@@ -1195,6 +1195,14 @@ INSTANTIATE_TEST_SUITE_P(
                              AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
                              AArch64::AEK_BF16 | AArch64::AEK_I8MM,
                          "8.5-A"),
+        ARMCPUTestParams("ampere1", "armv8.6-a", "crypto-neon-fp-armv8",
+                         AArch64::AEK_CRC  | AArch64::AEK_FP   | AArch64::AEK_FP16   |
+                             AArch64::AEK_SIMD | AArch64::AEK_RAS  | AArch64::AEK_LSE     |
+                             AArch64::AEK_RDM  | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+                             AArch64::AEK_SM4  | AArch64::AEK_SHA3 | AArch64::AEK_BF16    |
+                             AArch64::AEK_SHA2 | AArch64::AEK_AES  | AArch64::AEK_I8MM    |
+                             AArch64::AEK_MTE  | AArch64::AEK_SSBS | AArch64::AEK_SB,
+                         "8.6-A"),
         ARMCPUTestParams(
             "neoverse-512tvb", "armv8.4-a", "crypto-neon-fp-armv8",
             AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS |
@@ -1256,7 +1264,7 @@ INSTANTIATE_TEST_SUITE_P(
                              AArch64::AEK_LSE | AArch64::AEK_RDM,
                          "8.2-A")));
 
-static constexpr unsigned NumAArch64CPUArchs = 53;
+static constexpr unsigned NumAArch64CPUArchs = 54;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector<StringRef, NumAArch64CPUArchs> List;
diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt
index 87f86dc..af77744 100644
--- a/llvm/utils/TableGen/CMakeLists.txt
+++ b/llvm/utils/TableGen/CMakeLists.txt
@@ -22,13 +22,13 @@ add_tablegen(llvm-tblgen LLVM
   DAGISelMatcherGen.cpp
   DAGISelMatcherOpt.cpp
   DAGISelMatcher.cpp
+  DecoderEmitter.cpp
   DFAEmitter.cpp
   DFAPacketizerEmitter.cpp
   DirectiveEmitter.cpp
   DisassemblerEmitter.cpp
   ExegesisEmitter.cpp
   FastISelEmitter.cpp
-  FixedLenDecoderEmitter.cpp
   GICombinerEmitter.cpp
   GlobalISelEmitter.cpp
   InfoByHwMode.cpp
diff --git a/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 33be554..22bfc1f 100644
--- a/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -1,4 +1,4 @@
-//===------------ FixedLenDecoderEmitter.cpp - Decoder Generator ----------===//
+//===---------------- DecoderEmitter.cpp - Decoder Generator --------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -7,13 +7,14 @@
 //===----------------------------------------------------------------------===//
 //
 // It contains the tablegen backend that emits the decoder functions for
-// targets with fixed length instruction set.
+// targets with fixed/variable length instruction set.
 //
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
 #include "InfoByHwMode.h"
+#include "VarLenCodeEmitterGen.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/CachedHashString.h"
@@ -121,19 +122,18 @@ raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
   return OS;
 }
 
-class FixedLenDecoderEmitter {
+class DecoderEmitter {
   RecordKeeper &RK;
   std::vector<EncodingAndInst> NumberedEncodings;
 
 public:
   // Defaults preserved here for documentation, even though they aren't
   // strictly necessary given the way that this is currently being called.
-  FixedLenDecoderEmitter(RecordKeeper &R, std::string PredicateNamespace,
-                         std::string GPrefix = "if (",
-                         std::string GPostfix = " == MCDisassembler::Fail)",
-                         std::string ROK = "MCDisassembler::Success",
-                         std::string RFail = "MCDisassembler::Fail",
-                         std::string L = "")
+  DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace,
+                 std::string GPrefix = "if (",
+                 std::string GPostfix = " == MCDisassembler::Fail)",
+                 std::string ROK = "MCDisassembler::Success",
+                 std::string RFail = "MCDisassembler::Fail", std::string L = "")
       : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)),
         GuardPrefix(std::move(GPrefix)), GuardPostfix(std::move(GPostfix)),
         ReturnOK(std::move(ROK)), ReturnFail(std::move(RFail)),
@@ -143,6 +143,8 @@ public:
   void emitTable(formatted_raw_ostream &o, DecoderTable &Table,
                  unsigned Indentation, unsigned BitWidth,
                  StringRef Namespace) const;
+  void emitInstrLenTable(formatted_raw_ostream &OS,
+                         std::vector<unsigned> &InstrLen) const;
   void emitPredicateFunction(formatted_raw_ostream &OS,
                              PredicateSet &Predicates,
                              unsigned Indentation) const;
@@ -217,8 +219,28 @@ static void dumpBits(raw_ostream &o, const BitsInit &bits) {
 }
 
 static BitsInit &getBitsField(const Record &def, StringRef str) {
-  BitsInit *bits = def.getValueAsBitsInit(str);
-  return *bits;
+  const RecordVal *RV = def.getValue(str);
+  if (BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue()))
+    return *Bits;
+
+  // variable length instruction
+  VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV);
+  SmallVector<Init *, 16> Bits;
+
+  for (auto &SI : VLI) {
+    if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) {
+      for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) {
+        Bits.push_back(BI->getBit(Idx));
+      }
+    } else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value)) {
+      Bits.push_back(const_cast<BitInit *>(BI));
+    } else {
+      for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx)
+        Bits.push_back(UnsetInit::get());
+    }
+  }
+
+  return *BitsInit::get(Bits);
 }
 
 // Representation of the instruction to work on.
@@ -388,13 +410,13 @@ protected:
   unsigned BitWidth;
 
   // Parent emitter
-  const FixedLenDecoderEmitter *Emitter;
+  const DecoderEmitter *Emitter;
 
 public:
   FilterChooser(ArrayRef<EncodingAndInst> Insts,
                 const std::vector<EncodingIDAndOpcode> &IDs,
                 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
-                unsigned BW, const FixedLenDecoderEmitter *E)
+                unsigned BW, const DecoderEmitter *E)
       : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
         FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1),
         BitWidth(BW), Emitter(E) {
@@ -421,7 +443,8 @@ protected:
   // Populates the insn given the uid.
   void insnWithID(insn_t &Insn, unsigned Opcode) const {
     BitsInit &Bits = getBitsField(*AllInstructions[Opcode].EncodingDef, "Inst");
-
+    Insn.resize(BitWidth > Bits.getNumBits() ? BitWidth : Bits.getNumBits(),
+                BIT_UNSET);
     // We may have a SoftFail bitmask, which specifies a mask where an encoding
     // may differ from the value in "Inst" and yet still be valid, but the
     // disassembler should return SoftFail instead of Success.
@@ -430,11 +453,11 @@ protected:
     const RecordVal *RV =
         AllInstructions[Opcode].EncodingDef->getValue("SoftFail");
     const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
-    for (unsigned i = 0; i < BitWidth; ++i) {
+    for (unsigned i = 0; i < Bits.getNumBits(); ++i) {
       if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE)
-        Insn.push_back(BIT_UNSET);
+        Insn[i] = BIT_UNSET;
       else
-        Insn.push_back(bitFromBits(Bits, i));
+        Insn[i] = bitFromBits(Bits, i);
     }
   }
 
@@ -747,11 +770,9 @@ unsigned Filter::usefulness() const {
 //////////////////////////////////
 
 // Emit the decoder state machine table.
-void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
-                                       DecoderTable &Table,
-                                       unsigned Indentation,
-                                       unsigned BitWidth,
-                                       StringRef Namespace) const {
+void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
+                               unsigned Indentation, unsigned BitWidth,
+                               StringRef Namespace) const {
   OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace
     << BitWidth << "[] = {\n";
 
@@ -936,9 +957,18 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
   OS.indent(Indentation) << "};\n\n";
 }
 
-void FixedLenDecoderEmitter::
-emitPredicateFunction(formatted_raw_ostream &OS, PredicateSet &Predicates,
-                      unsigned Indentation) const {
+void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
+                                       std::vector<unsigned> &InstrLen) const {
+  OS << "static const uint8_t InstrLenTable[] = {\n";
+  for (unsigned &Len : InstrLen) {
+    OS << Len << ",\n";
+  }
+  OS << "};\n\n";
+}
+
+void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
+                                           PredicateSet &Predicates,
+                                           unsigned Indentation) const {
   // The predicate function is just a big switch statement based on the
   // input predicate index.
   OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, "
@@ -961,9 +991,9 @@ emitPredicateFunction(formatted_raw_ostream &OS, PredicateSet &Predicates,
   OS.indent(Indentation) << "}\n\n";
 }
 
-void FixedLenDecoderEmitter::
-emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
-                    unsigned Indentation) const {
+void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
+                                         DecoderSet &Decoders,
+                                         unsigned Indentation) const {
   // The decoder function is just a big switch statement based on the
   // input decoder index.
   OS.indent(Indentation) << "template <typename InsnType>\n";
@@ -1787,11 +1817,9 @@ void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
   }
 }
 
-static std::string findOperandDecoderMethod(TypedInit *TI) {
+static std::string findOperandDecoderMethod(Record *Record) {
   std::string Decoder;
 
-  Record *Record = cast<DefInit>(TI)->getDef();
-
   RecordVal *DecoderString = Record->getValue("DecoderMethod");
   StringInit *String = DecoderString ?
     dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
@@ -1814,17 +1842,88 @@ static std::string findOperandDecoderMethod(TypedInit *TI) {
   return Decoder;
 }
 
-static bool
+OperandInfo getOpInfo(Record *TypeRecord) {
+  std::string Decoder = findOperandDecoderMethod(TypeRecord);
+
+  RecordVal *HasCompleteDecoderVal = TypeRecord->getValue("hasCompleteDecoder");
+  BitInit *HasCompleteDecoderBit =
+      HasCompleteDecoderVal
+          ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue())
+          : nullptr;
+  bool HasCompleteDecoder =
+      HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
+
+  return OperandInfo(Decoder, HasCompleteDecoder);
+}
+
+void parseVarLenInstOperand(const Record &Def,
+                            std::vector<OperandInfo> &Operands,
+                            const CodeGenInstruction &CGI) {
+
+  const RecordVal *RV = Def.getValue("Inst");
+  VarLenInst VLI(cast<DagInit>(RV->getValue()), RV);
+  SmallVector<int> TiedTo;
+
+  for (unsigned Idx = 0; Idx < CGI.Operands.size(); ++Idx) {
+    auto &Op = CGI.Operands[Idx];
+    if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0)
+      for (auto *Arg : Op.MIOperandInfo->getArgs())
+        Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef()));
+    else
+      Operands.push_back(getOpInfo(Op.Rec));
+
+    int TiedReg = Op.getTiedRegister();
+    TiedTo.push_back(-1);
+    if (TiedReg != -1) {
+      TiedTo[Idx] = TiedReg;
+      TiedTo[TiedReg] = Idx;
+    }
+  }
+
+  unsigned CurrBitPos = 0;
+  for (auto &EncodingSegment : VLI) {
+    unsigned Offset = 0;
+    StringRef OpName;
+
+    if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) {
+      OpName = SI->getValue();
+    } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) {
+      OpName = cast<StringInit>(DI->getArg(0))->getValue();
+      Offset = cast<IntInit>(DI->getArg(2))->getValue();
+    }
+
+    if (!OpName.empty()) {
+      auto OpSubOpPair =
+          const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName(
+              OpName);
+      unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair);
+      Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
+
+      int TiedReg = TiedTo[OpSubOpPair.first];
+      if (TiedReg != -1) {
+        unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(
+            std::make_pair(TiedReg, OpSubOpPair.second));
+        Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
+      }
+    }
+
+    CurrBitPos += EncodingSegment.BitWidth;
+  }
+}
+
+static unsigned
 populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
                     const CodeGenInstruction &CGI, unsigned Opc,
-                    std::map<unsigned, std::vector<OperandInfo>> &Operands) {
+                    std::map<unsigned, std::vector<OperandInfo>> &Operands,
+                    bool IsVarLenInst) {
   const Record &Def = *CGI.TheDef;
   // If all the bit positions are not specified; do not decode this instruction.
   // We are bound to fail!  For proper disassembly, the well-known encoding bits
   // of the instruction must be fully specified.
 
   BitsInit &Bits = getBitsField(EncodingDef, "Inst");
-  if (Bits.allInComplete()) return false;
+  if (Bits.allInComplete())
+    return 0;
 
   std::vector<OperandInfo> InsnOperands;
 
@@ -1836,7 +1935,7 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
     InsnOperands.push_back(
         OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder));
     Operands[Opc] = InsnOperands;
-    return true;
+    return Bits.getNumBits();
   }
 
   // Generate a description of the operand of the instruction that we know
@@ -1850,11 +1949,11 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
   DagInit *Out  = Def.getValueAsDag("OutOperandList");
   DagInit *In  = Def.getValueAsDag("InOperandList");
   for (unsigned i = 0; i < Out->getNumArgs(); ++i)
-    InOutOperands.push_back(std::make_pair(Out->getArg(i),
-                                           Out->getArgNameStr(i)));
+    InOutOperands.push_back(
+        std::make_pair(Out->getArg(i), Out->getArgNameStr(i)));
   for (unsigned i = 0; i < In->getNumArgs(); ++i)
-    InOutOperands.push_back(std::make_pair(In->getArg(i),
-                                           In->getArgNameStr(i)));
+    InOutOperands.push_back(
+        std::make_pair(In->getArg(i), In->getArgNameStr(i)));
 
   // Search for tied operands, so that we can correctly instantiate
   // operands that are not explicitly represented in the encoding.
@@ -1871,257 +1970,254 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
     }
   }
 
-  std::map<std::string, std::vector<OperandInfo>> NumberedInsnOperands;
-  std::set<std::string> NumberedInsnOperandsNoTie;
-  if (Target.getInstructionSet()->
-        getValueAsBit("decodePositionallyEncodedOperands")) {
-    const std::vector<RecordVal> &Vals = Def.getValues();
-    unsigned NumberedOp = 0;
-
-    std::set<unsigned> NamedOpIndices;
-    if (Target.getInstructionSet()->
-         getValueAsBit("noNamedPositionallyEncodedOperands"))
-      // Collect the set of operand indices that might correspond to named
-      // operand, and skip these when assigning operands based on position.
+  if (IsVarLenInst) {
+    parseVarLenInstOperand(EncodingDef, InsnOperands, CGI);
+  } else {
+    std::map<std::string, std::vector<OperandInfo>> NumberedInsnOperands;
+    std::set<std::string> NumberedInsnOperandsNoTie;
+    if (Target.getInstructionSet()->getValueAsBit(
+            "decodePositionallyEncodedOperands")) {
+      const std::vector<RecordVal> &Vals = Def.getValues();
+      unsigned NumberedOp = 0;
+
+      std::set<unsigned> NamedOpIndices;
+      if (Target.getInstructionSet()->getValueAsBit(
+              "noNamedPositionallyEncodedOperands"))
+        // Collect the set of operand indices that might correspond to named
+        // operand, and skip these when assigning operands based on position.
+        for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+          unsigned OpIdx;
+          if (!CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx))
+            continue;
+
+          NamedOpIndices.insert(OpIdx);
+        }
+
       for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
-        unsigned OpIdx;
-        if (!CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx))
+        // Ignore fixed fields in the record, we're looking for values like:
+        //    bits<5> RST = { ?, ?, ?, ?, ? };
+        if (Vals[i].isNonconcreteOK() || Vals[i].getValue()->isComplete())
           continue;
 
-        NamedOpIndices.insert(OpIdx);
-      }
+        // Determine if Vals[i] actually contributes to the Inst encoding.
+        unsigned bi = 0;
+        for (; bi < Bits.getNumBits(); ++bi) {
+          VarInit *Var = nullptr;
+          VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
+          if (BI)
+            Var = dyn_cast<VarInit>(BI->getBitVar());
+          else
+            Var = dyn_cast<VarInit>(Bits.getBit(bi));
+
+          if (Var && Var->getName() == Vals[i].getName())
+            break;
+        }
 
-    for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
-      // Ignore fixed fields in the record, we're looking for values like:
-      //    bits<5> RST = { ?, ?, ?, ?, ? };
-      if (Vals[i].isNonconcreteOK() || Vals[i].getValue()->isComplete())
-        continue;
+        if (bi == Bits.getNumBits())
+          continue;
 
-      // Determine if Vals[i] actually contributes to the Inst encoding.
-      unsigned bi = 0;
-      for (; bi < Bits.getNumBits(); ++bi) {
-        VarInit *Var = nullptr;
-        VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
-        if (BI)
-          Var = dyn_cast<VarInit>(BI->getBitVar());
-        else
-          Var = dyn_cast<VarInit>(Bits.getBit(bi));
+        // Skip variables that correspond to explicitly-named operands.
+        unsigned OpIdx;
+        if (CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx))
+          continue;
 
-        if (Var && Var->getName() == Vals[i].getName())
-          break;
-      }
+        // Get the bit range for this operand:
+        unsigned bitStart = bi++, bitWidth = 1;
+        for (; bi < Bits.getNumBits(); ++bi) {
+          VarInit *Var = nullptr;
+          VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
+          if (BI)
+            Var = dyn_cast<VarInit>(BI->getBitVar());
+          else
+            Var = dyn_cast<VarInit>(Bits.getBit(bi));
 
-      if (bi == Bits.getNumBits())
-        continue;
+          if (!Var)
+            break;
 
-      // Skip variables that correspond to explicitly-named operands.
-      unsigned OpIdx;
-      if (CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx))
-        continue;
+          if (Var->getName() != Vals[i].getName())
+            break;
 
-      // Get the bit range for this operand:
-      unsigned bitStart = bi++, bitWidth = 1;
-      for (; bi < Bits.getNumBits(); ++bi) {
-        VarInit *Var = nullptr;
-        VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
-        if (BI)
-          Var = dyn_cast<VarInit>(BI->getBitVar());
-        else
-          Var = dyn_cast<VarInit>(Bits.getBit(bi));
+          ++bitWidth;
+        }
 
-        if (!Var)
-          break;
+        unsigned NumberOps = CGI.Operands.size();
+        while (NumberedOp < NumberOps &&
+               (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
+                (!NamedOpIndices.empty() &&
+                 NamedOpIndices.count(
+                     CGI.Operands.getSubOperandNumber(NumberedOp).first))))
+          ++NumberedOp;
+
+        OpIdx = NumberedOp++;
+
+        // OpIdx now holds the ordered operand number of Vals[i].
+        std::pair<unsigned, unsigned> SO =
+            CGI.Operands.getSubOperandNumber(OpIdx);
+        const std::string &Name = CGI.Operands[SO.first].Name;
+
+        LLVM_DEBUG(dbgs() << "Numbered operand mapping for " << Def.getName()
+                          << ": " << Name << "(" << SO.first << ", "
+                          << SO.second << ") => " << Vals[i].getName() << "\n");
+
+        std::string Decoder;
+        Record *TypeRecord = CGI.Operands[SO.first].Rec;
+
+        RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod");
+        StringInit *String =
+            DecoderString ? dyn_cast<StringInit>(DecoderString->getValue())
+                          : nullptr;
+        if (String && String->getValue() != "")
+          Decoder = std::string(String->getValue());
+
+        if (Decoder == "" && CGI.Operands[SO.first].MIOperandInfo &&
+            CGI.Operands[SO.first].MIOperandInfo->getNumArgs()) {
+          Init *Arg = CGI.Operands[SO.first].MIOperandInfo->getArg(SO.second);
+          if (DefInit *DI = cast<DefInit>(Arg))
+            TypeRecord = DI->getDef();
+        }
 
-        if (Var->getName() != Vals[i].getName())
-          break;
+        bool isReg = false;
+        if (TypeRecord->isSubClassOf("RegisterOperand"))
+          TypeRecord = TypeRecord->getValueAsDef("RegClass");
+        if (TypeRecord->isSubClassOf("RegisterClass")) {
+          Decoder = "Decode" + TypeRecord->getName().str() + "RegisterClass";
+          isReg = true;
+        } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) {
+          Decoder = "DecodePointerLikeRegClass" +
+                    utostr(TypeRecord->getValueAsInt("RegClassKind"));
+          isReg = true;
+        }
 
-        ++bitWidth;
+        DecoderString = TypeRecord->getValue("DecoderMethod");
+        String = DecoderString ? dyn_cast<StringInit>(DecoderString->getValue())
+                               : nullptr;
+        if (!isReg && String && String->getValue() != "")
+          Decoder = std::string(String->getValue());
+
+        RecordVal *HasCompleteDecoderVal =
+            TypeRecord->getValue("hasCompleteDecoder");
+        BitInit *HasCompleteDecoderBit =
+            HasCompleteDecoderVal
+                ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue())
+                : nullptr;
+        bool HasCompleteDecoder =
+            HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
+
+        OperandInfo OpInfo(Decoder, HasCompleteDecoder);
+        OpInfo.addField(bitStart, bitWidth, 0);
+
+        NumberedInsnOperands[Name].push_back(OpInfo);
+
+        // FIXME: For complex operands with custom decoders we can't handle tied
+        // sub-operands automatically. Skip those here and assume that this is
+        // fixed up elsewhere.
+        if (CGI.Operands[SO.first].MIOperandInfo &&
+            CGI.Operands[SO.first].MIOperandInfo->getNumArgs() > 1 && String &&
+            String->getValue() != "")
+          NumberedInsnOperandsNoTie.insert(Name);
       }
+    }
 
-      unsigned NumberOps = CGI.Operands.size();
-      while (NumberedOp < NumberOps &&
-             (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
-              (!NamedOpIndices.empty() && NamedOpIndices.count(
-                CGI.Operands.getSubOperandNumber(NumberedOp).first))))
-        ++NumberedOp;
-
-      OpIdx = NumberedOp++;
-
-      // OpIdx now holds the ordered operand number of Vals[i].
-      std::pair<unsigned, unsigned> SO =
-        CGI.Operands.getSubOperandNumber(OpIdx);
-      const std::string &Name = CGI.Operands[SO.first].Name;
-
-      LLVM_DEBUG(dbgs() << "Numbered operand mapping for " << Def.getName()
-                        << ": " << Name << "(" << SO.first << ", " << SO.second
-                        << ") => " << Vals[i].getName() << "\n");
-
-      std::string Decoder;
-      Record *TypeRecord = CGI.Operands[SO.first].Rec;
-
-      RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod");
-      StringInit *String = DecoderString ?
-        dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
-      if (String && String->getValue() != "")
-        Decoder = std::string(String->getValue());
-
-      if (Decoder == "" &&
-          CGI.Operands[SO.first].MIOperandInfo &&
-          CGI.Operands[SO.first].MIOperandInfo->getNumArgs()) {
-        Init *Arg = CGI.Operands[SO.first].MIOperandInfo->
-                      getArg(SO.second);
-        if (DefInit *DI = dyn_cast<DefInit>(Arg))
-          TypeRecord = DI->getDef();
+    // For each operand, see if we can figure out where it is encoded.
+    for (const auto &Op : InOutOperands) {
+      if (!NumberedInsnOperands[std::string(Op.second)].empty()) {
+        llvm::append_range(InsnOperands,
+                           NumberedInsnOperands[std::string(Op.second)]);
+        continue;
       }
-
-      bool isReg = false;
-      if (TypeRecord->isSubClassOf("RegisterOperand"))
-        TypeRecord = TypeRecord->getValueAsDef("RegClass");
-      if (TypeRecord->isSubClassOf("RegisterClass")) {
-        Decoder = "Decode" + TypeRecord->getName().str() + "RegisterClass";
-        isReg = true;
-      } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) {
-        Decoder = "DecodePointerLikeRegClass" +
-                  utostr(TypeRecord->getValueAsInt("RegClassKind"));
-        isReg = true;
+      if (!NumberedInsnOperands[TiedNames[std::string(Op.second)]].empty()) {
+        if (!NumberedInsnOperandsNoTie.count(
+                TiedNames[std::string(Op.second)])) {
+          // Figure out to which (sub)operand we're tied.
+          unsigned i =
+              CGI.Operands.getOperandNamed(TiedNames[std::string(Op.second)]);
+          int tiedTo = CGI.Operands[i].getTiedRegister();
+          if (tiedTo == -1) {
+            i = CGI.Operands.getOperandNamed(Op.second);
+            tiedTo = CGI.Operands[i].getTiedRegister();
+          }
+
+          if (tiedTo != -1) {
+            std::pair<unsigned, unsigned> SO =
+                CGI.Operands.getSubOperandNumber(tiedTo);
+
+            InsnOperands.push_back(
+                NumberedInsnOperands[TiedNames[std::string(Op.second)]]
+                                    [SO.second]);
+          }
+        }
+        continue;
       }
 
-      DecoderString = TypeRecord->getValue("DecoderMethod");
-      String = DecoderString ?
-        dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
-      if (!isReg && String && String->getValue() != "")
-        Decoder = std::string(String->getValue());
-
-      RecordVal *HasCompleteDecoderVal =
-        TypeRecord->getValue("hasCompleteDecoder");
-      BitInit *HasCompleteDecoderBit = HasCompleteDecoderVal ?
-        dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) : nullptr;
-      bool HasCompleteDecoder = HasCompleteDecoderBit ?
-        HasCompleteDecoderBit->getValue() : true;
-
-      OperandInfo OpInfo(Decoder, HasCompleteDecoder);
-      OpInfo.addField(bitStart, bitWidth, 0);
-
-      NumberedInsnOperands[Name].push_back(OpInfo);
-
-      // FIXME: For complex operands with custom decoders we can't handle tied
-      // sub-operands automatically. Skip those here and assume that this is
-      // fixed up elsewhere.
-      if (CGI.Operands[SO.first].MIOperandInfo &&
-          CGI.Operands[SO.first].MIOperandInfo->getNumArgs() > 1 &&
-          String && String->getValue() != "")
-        NumberedInsnOperandsNoTie.insert(Name);
-    }
-  }
+      // At this point, we can locate the decoder field, but we need to know how
+      // to interpret it.  As a first step, require the target to provide
+      // callbacks for decoding register classes.
 
-  // For each operand, see if we can figure out where it is encoded.
-  for (const auto &Op : InOutOperands) {
-    if (!NumberedInsnOperands[std::string(Op.second)].empty()) {
-      llvm::append_range(InsnOperands,
-                         NumberedInsnOperands[std::string(Op.second)]);
-      continue;
-    }
-    if (!NumberedInsnOperands[TiedNames[std::string(Op.second)]].empty()) {
-      if (!NumberedInsnOperandsNoTie.count(TiedNames[std::string(Op.second)])) {
-        // Figure out to which (sub)operand we're tied.
-        unsigned i =
-            CGI.Operands.getOperandNamed(TiedNames[std::string(Op.second)]);
-        int tiedTo = CGI.Operands[i].getTiedRegister();
-        if (tiedTo == -1) {
-          i = CGI.Operands.getOperandNamed(Op.second);
-          tiedTo = CGI.Operands[i].getTiedRegister();
-        }
+      OperandInfo OpInfo = getOpInfo(cast<DefInit>(Op.first)->getDef());
 
-        if (tiedTo != -1) {
-          std::pair<unsigned, unsigned> SO =
-            CGI.Operands.getSubOperandNumber(tiedTo);
+      // Some bits of the operand may be required to be 1 depending on the
+      // instruction's encoding. Collect those bits.
+      if (const RecordVal *EncodedValue = EncodingDef.getValue(Op.second))
+        if (const BitsInit *OpBits =
+                dyn_cast<BitsInit>(EncodedValue->getValue()))
+          for (unsigned I = 0; I < OpBits->getNumBits(); ++I)
+            if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I)))
+              if (OpBit->getValue())
+                OpInfo.InitValue |= 1ULL << I;
 
-          InsnOperands.push_back(
-              NumberedInsnOperands[TiedNames[std::string(Op.second)]]
-                                  [SO.second]);
-        }
-      }
-      continue;
-    }
+      unsigned Base = ~0U;
+      unsigned Width = 0;
+      unsigned Offset = 0;
 
-    TypedInit *TI = cast<TypedInit>(Op.first);
-
-    // At this point, we can locate the decoder field, but we need to know how
-    // to interpret it.  As a first step, require the target to provide
-    // callbacks for decoding register classes.
-    std::string Decoder = findOperandDecoderMethod(TI);
-    Record *TypeRecord = cast<DefInit>(TI)->getDef();
-
-    RecordVal *HasCompleteDecoderVal =
-      TypeRecord->getValue("hasCompleteDecoder");
-    BitInit *HasCompleteDecoderBit = HasCompleteDecoderVal ?
-      dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) : nullptr;
-    bool HasCompleteDecoder = HasCompleteDecoderBit ?
-      HasCompleteDecoderBit->getValue() : true;
-
-    OperandInfo OpInfo(Decoder, HasCompleteDecoder);
-
-    // Some bits of the operand may be required to be 1 depending on the
-    // instruction's encoding. Collect those bits.
-    if (const RecordVal *EncodedValue = EncodingDef.getValue(Op.second))
-      if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue()))
-        for (unsigned I = 0; I < OpBits->getNumBits(); ++I)
-          if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I)))
-            if (OpBit->getValue())
-              OpInfo.InitValue |= 1ULL << I;
-
-    unsigned Base = ~0U;
-    unsigned Width = 0;
-    unsigned Offset = 0;
+      for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) {
+        VarInit *Var = nullptr;
+        VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
+        if (BI)
+          Var = dyn_cast<VarInit>(BI->getBitVar());
+        else
+          Var = dyn_cast<VarInit>(Bits.getBit(bi));
 
-    for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) {
-      VarInit *Var = nullptr;
-      VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
-      if (BI)
-        Var = dyn_cast<VarInit>(BI->getBitVar());
-      else
-        Var = dyn_cast<VarInit>(Bits.getBit(bi));
+        if (!Var) {
+          if (Base != ~0U) {
+            OpInfo.addField(Base, Width, Offset);
+            Base = ~0U;
+            Width = 0;
+            Offset = 0;
+          }
+          continue;
+        }
 
-      if (!Var) {
-        if (Base != ~0U) {
-          OpInfo.addField(Base, Width, Offset);
-          Base = ~0U;
-          Width = 0;
-          Offset = 0;
+        if ((Var->getName() != Op.second &&
+             Var->getName() != TiedNames[std::string(Op.second)])) {
+          if (Base != ~0U) {
+            OpInfo.addField(Base, Width, Offset);
+            Base = ~0U;
+            Width = 0;
+            Offset = 0;
+          }
+          continue;
         }
-        continue;
-      }
 
-      if (Var->getName() != Op.second &&
-          Var->getName() != TiedNames[std::string(Op.second)]) {
-        if (Base != ~0U) {
+        if (Base == ~0U) {
+          Base = bi;
+          Width = 1;
+          Offset = BI ? BI->getBitNum() : 0;
+        } else if (BI && BI->getBitNum() != Offset + Width) {
           OpInfo.addField(Base, Width, Offset);
-          Base = ~0U;
-          Width = 0;
-          Offset = 0;
+          Base = bi;
+          Width = 1;
+          Offset = BI->getBitNum();
+        } else {
+          ++Width;
         }
-        continue;
       }
 
-      if (Base == ~0U) {
-        Base = bi;
-        Width = 1;
-        Offset = BI ? BI->getBitNum() : 0;
-      } else if (BI && BI->getBitNum() != Offset + Width) {
+      if (Base != ~0U)
         OpInfo.addField(Base, Width, Offset);
-        Base = bi;
-        Width = 1;
-        Offset = BI->getBitNum();
-      } else {
-        ++Width;
-      }
-    }
 
-    if (Base != ~0U)
-      OpInfo.addField(Base, Width, Offset);
-
-    if (OpInfo.numFields() > 0)
-      InsnOperands.push_back(OpInfo);
+      if (OpInfo.numFields() > 0)
+        InsnOperands.push_back(OpInfo);
+    }
   }
 
   Operands[Opc] = InsnOperands;
@@ -2144,7 +2240,7 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
     });
 #endif
 
-  return true;
+  return Bits.getNumBits();
 }
 
 // emitFieldFromInstruction - Emit the templated helper function
@@ -2216,18 +2312,26 @@ static void emitInsertBits(formatted_raw_ostream &OS) {
 
 // emitDecodeInstruction - Emit the templated helper function
 // decodeInstruction().
-static void emitDecodeInstruction(formatted_raw_ostream &OS) {
+static void emitDecodeInstruction(formatted_raw_ostream &OS,
+                                  bool IsVarLenInst) {
   OS << "template <typename InsnType>\n"
      << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], "
         "MCInst &MI,\n"
      << "                                      InsnType insn, uint64_t "
         "Address,\n"
      << "                                      const MCDisassembler *DisAsm,\n"
-     << "                                      const MCSubtargetInfo &STI) {\n"
+     << "                                      const MCSubtargetInfo &STI";
+  if (IsVarLenInst) {
+    OS << ",\n"
+       << "                                      llvm::function_ref<void(APInt "
+          "&,"
+       << " uint64_t)> makeUp";
+  }
+  OS << ") {\n"
      << "  const FeatureBitset &Bits = STI.getFeatureBits();\n"
      << "\n"
      << "  const uint8_t *Ptr = DecodeTable;\n"
-     << "  InsnType CurFieldValue = 0;\n"
+     << "  uint64_t CurFieldValue = 0;\n"
      << "  DecodeStatus S = MCDisassembler::Success;\n"
      << "  while (true) {\n"
      << "    ptrdiff_t Loc = Ptr - DecodeTable;\n"
@@ -2238,8 +2342,10 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "    case MCD::OPC_ExtractField: {\n"
      << "      unsigned Start = *++Ptr;\n"
      << "      unsigned Len = *++Ptr;\n"
-     << "      ++Ptr;\n"
-     << "      CurFieldValue = fieldFromInstruction(insn, Start, Len);\n"
+     << "      ++Ptr;\n";
+  if (IsVarLenInst)
+    OS << "      makeUp(insn, Start + Len);\n";
+  OS << "      CurFieldValue = fieldFromInstruction(insn, Start, Len);\n"
      << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << "
         "\", \"\n"
      << "                   << Len << \"): \" << CurFieldValue << \"\\n\");\n"
@@ -2248,7 +2354,7 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "    case MCD::OPC_FilterValue: {\n"
      << "      // Decode the field value.\n"
      << "      unsigned Len;\n"
-     << "      InsnType Val = decodeULEB128(++Ptr, &Len);\n"
+     << "      uint64_t Val = decodeULEB128(++Ptr, &Len);\n"
      << "      Ptr += Len;\n"
      << "      // NumToSkip is a plain 24-bit integer.\n"
      << "      unsigned NumToSkip = *Ptr++;\n"
@@ -2269,11 +2375,14 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "    }\n"
      << "    case MCD::OPC_CheckField: {\n"
      << "      unsigned Start = *++Ptr;\n"
-     << "      unsigned Len = *++Ptr;\n"
-     << "      InsnType FieldValue = fieldFromInstruction(insn, Start, Len);\n"
+     << "      unsigned Len = *++Ptr;\n";
+  if (IsVarLenInst)
+    OS << "      makeUp(insn, Start + Len);\n";
+  OS << "      uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);\n"
      << "      // Decode the field value.\n"
-     << "      InsnType ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
-     << "      Ptr += Len;\n"
+     << "      unsigned PtrLen = 0;\n"
+     << "      uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);\n"
+     << "      Ptr += PtrLen;\n"
      << "      // NumToSkip is a plain 24-bit integer.\n"
      << "      unsigned NumToSkip = *Ptr++;\n"
      << "      NumToSkip |= (*Ptr++) << 8;\n"
@@ -2323,8 +2432,12 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "\n"
      << "      MI.clear();\n"
      << "      MI.setOpcode(Opc);\n"
-     << "      bool DecodeComplete;\n"
-     << "      S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, "
+     << "      bool DecodeComplete;\n";
+  if (IsVarLenInst) {
+    OS << "      Len = InstrLenTable[Opc];\n"
+       << "      makeUp(insn, Len);\n";
+  }
+  OS << "      S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, "
         "DecodeComplete);\n"
      << "      assert(DecodeComplete);\n"
      << "\n"
@@ -2378,11 +2491,12 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "    case MCD::OPC_SoftFail: {\n"
      << "      // Decode the mask values.\n"
      << "      unsigned Len;\n"
-     << "      InsnType PositiveMask = decodeULEB128(++Ptr, &Len);\n"
+     << "      uint64_t PositiveMask = decodeULEB128(++Ptr, &Len);\n"
      << "      Ptr += Len;\n"
-     << "      InsnType NegativeMask = decodeULEB128(Ptr, &Len);\n"
+     << "      uint64_t NegativeMask = decodeULEB128(Ptr, &Len);\n"
      << "      Ptr += Len;\n"
-     << "      bool Fail = (insn & PositiveMask) || (~insn & NegativeMask);\n"
+     << "      bool Fail = (insn & PositiveMask) != 0 || (~insn & "
+        "NegativeMask) != 0;\n"
      << "      if (Fail)\n"
      << "        S = MCDisassembler::SoftFail;\n"
      << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? "
@@ -2401,7 +2515,7 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
 }
 
 // Emits disassembler code for instruction decoding.
-void FixedLenDecoderEmitter::run(raw_ostream &o) {
+void DecoderEmitter::run(raw_ostream &o) {
   formatted_raw_ostream OS(o);
   OS << "#include \"llvm/MC/MCInst.h\"\n";
   OS << "#include \"llvm/MC/MCSubtargetInfo.h\"\n";
@@ -2473,6 +2587,14 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
   std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>>
       OpcMap;
   std::map<unsigned, std::vector<OperandInfo>> Operands;
+  std::vector<unsigned> InstrLen;
+
+  bool IsVarLenInst =
+      any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
+        RecordVal *RV = CGI->TheDef->getValue("Inst");
+        return RV && isa<DagInit>(RV->getValue());
+      });
+  unsigned MaxInstLen = 0;
 
   for (unsigned i = 0; i < NumberedEncodings.size(); ++i) {
     const Record *EncodingDef = NumberedEncodings[i].EncodingDef;
@@ -2491,10 +2613,18 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
       NumInstructions++;
     NumEncodings++;
 
-    if (!Size)
+    if (!Size && !IsVarLenInst)
       continue;
 
-    if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) {
+    if (IsVarLenInst)
+      InstrLen.resize(NumberedInstructions.size(), 0);
+
+    if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, i,
+                                           Operands, IsVarLenInst)) {
+      if (IsVarLenInst) {
+        MaxInstLen = std::max(MaxInstLen, Len);
+        InstrLen[i] = Len;
+      }
       std::string DecoderNamespace =
           std::string(EncodingDef->getValueAsString("DecoderNamespace"));
       if (!NumberedEncodings[i].HwModeName.empty())
@@ -2513,7 +2643,7 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
     ArrayRef<EncodingAndInst> NumberedEncodingsRef(
         NumberedEncodings.data(), NumberedEncodings.size());
     FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands,
-                     8 * Opc.first.second, this);
+                     IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this);
 
     // The decode table is cleared for each top level decoder function. The
     // predicates and decoders themselves, however, are shared across all
@@ -2538,6 +2668,11 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
     OS.flush();
   }
 
+  // For variable instruction, we emit a instruction length table
+  // to let the decoder know how long the instructions are.
+  // You can see example usage in M68k's disassembler.
+  if (IsVarLenInst)
+    emitInstrLenTable(OS, InstrLen);
   // Emit the predicate function.
   emitPredicateFunction(OS, TableInfo.Predicates, 0);
 
@@ -2545,20 +2680,20 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
   emitDecoderFunction(OS, TableInfo.Decoders, 0);
 
   // Emit the main entry point for the decoder, decodeInstruction().
-  emitDecodeInstruction(OS);
+  emitDecodeInstruction(OS, IsVarLenInst);
 
   OS << "\n} // end namespace llvm\n";
 }
 
 namespace llvm {
 
-void EmitFixedLenDecoder(RecordKeeper &RK, raw_ostream &OS,
-                         const std::string &PredicateNamespace,
-                         const std::string &GPrefix,
-                         const std::string &GPostfix, const std::string &ROK,
-                         const std::string &RFail, const std::string &L) {
-  FixedLenDecoderEmitter(RK, PredicateNamespace, GPrefix, GPostfix,
-                         ROK, RFail, L).run(OS);
+void EmitDecoder(RecordKeeper &RK, raw_ostream &OS,
+                 const std::string &PredicateNamespace,
+                 const std::string &GPrefix, const std::string &GPostfix,
+                 const std::string &ROK, const std::string &RFail,
+                 const std::string &L) {
+  DecoderEmitter(RK, PredicateNamespace, GPrefix, GPostfix, ROK, RFail, L)
+      .run(OS);
 }
 
 } // end namespace llvm
diff --git a/llvm/utils/TableGen/DisassemblerEmitter.cpp b/llvm/utils/TableGen/DisassemblerEmitter.cpp
index 7c3f53b..297d12c 100644
--- a/llvm/utils/TableGen/DisassemblerEmitter.cpp
+++ b/llvm/utils/TableGen/DisassemblerEmitter.cpp
@@ -95,12 +95,11 @@ using namespace llvm::X86Disassembler;
 
 namespace llvm {
 
-extern void EmitFixedLenDecoder(RecordKeeper &RK, raw_ostream &OS,
-                                const std::string &PredicateNamespace,
-                                const std::string &GPrefix,
-                                const std::string &GPostfix,
-                                const std::string &ROK,
-                                const std::string &RFail, const std::string &L);
+extern void EmitDecoder(RecordKeeper &RK, raw_ostream &OS,
+                        const std::string &PredicateNamespace,
+                        const std::string &GPrefix, const std::string &GPostfix,
+                        const std::string &ROK, const std::string &RFail,
+                        const std::string &L);
 
 void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
   CodeGenTarget Target(Records);
@@ -140,17 +139,16 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
     if (PredicateNamespace == "Thumb")
       PredicateNamespace = "ARM";
 
-    EmitFixedLenDecoder(Records, OS, PredicateNamespace,
-                        "if (!Check(S, ", "))",
-                        "S", "MCDisassembler::Fail",
-                        "  MCDisassembler::DecodeStatus S = "
-                          "MCDisassembler::Success;\n(void)S;");
+    EmitDecoder(Records, OS, PredicateNamespace, "if (!Check(S, ", "))", "S",
+                "MCDisassembler::Fail",
+                "  MCDisassembler::DecodeStatus S = "
+                "MCDisassembler::Success;\n(void)S;");
     return;
   }
 
-  EmitFixedLenDecoder(Records, OS, std::string(Target.getName()), "if (",
-                      " == MCDisassembler::Fail)", "MCDisassembler::Success",
-                      "MCDisassembler::Fail", "");
+  EmitDecoder(Records, OS, std::string(Target.getName()), "if (",
+              " == MCDisassembler::Fail)", "MCDisassembler::Success",
+              "MCDisassembler::Fail", "");
 }
 
 } // end namespace llvm
diff --git a/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp b/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp
index 128a680..ce93725 100644
--- a/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp
@@ -58,7 +58,6 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
 
 using namespace llvm;
 
@@ -67,48 +66,6 @@ namespace {
 class VarLenCodeEmitterGen {
   RecordKeeper &Records;
 
-  struct EncodingSegment {
-    unsigned BitWidth;
-    const Init *Value;
-    StringRef CustomEncoder = "";
-  };
-
-  class VarLenInst {
-    RecordVal *TheDef;
-    size_t NumBits;
-
-    // Set if any of the segment is not fixed value.
-    bool HasDynamicSegment;
-
-    SmallVector<EncodingSegment, 4> Segments;
-
-    void buildRec(const DagInit *DI);
-
-    StringRef getCustomEncoderName(const Init *EI) const {
-      if (const auto *DI = dyn_cast<DagInit>(EI)) {
-        if (DI->getNumArgs() && isa<StringInit>(DI->getArg(0)))
-          return cast<StringInit>(DI->getArg(0))->getValue();
-      }
-      return "";
-    }
-
-  public:
-    VarLenInst() : TheDef(nullptr), NumBits(0U), HasDynamicSegment(false) {}
-
-    explicit VarLenInst(const DagInit *DI, RecordVal *TheDef);
-
-    /// Number of bits
-    size_t size() const { return NumBits; }
-
-    using const_iterator = decltype(Segments)::const_iterator;
-
-    const_iterator begin() const { return Segments.begin(); }
-    const_iterator end() const { return Segments.end(); }
-    size_t getNumSegments() const { return Segments.size(); }
-
-    bool isFixedValueOnly() const { return !HasDynamicSegment; }
-  };
-
   DenseMap<Record *, VarLenInst> VarLenInsts;
 
   // Emit based values (i.e. fixed bits in the encoded instructions)
@@ -129,15 +86,14 @@ public:
 
 } // end anonymous namespace
 
-VarLenCodeEmitterGen::VarLenInst::VarLenInst(const DagInit *DI,
-                                             RecordVal *TheDef)
+VarLenInst::VarLenInst(const DagInit *DI, const RecordVal *TheDef)
     : TheDef(TheDef), NumBits(0U) {
   buildRec(DI);
   for (const auto &S : Segments)
     NumBits += S.BitWidth;
 }
 
-void VarLenCodeEmitterGen::VarLenInst::buildRec(const DagInit *DI) {
+void VarLenInst::buildRec(const DagInit *DI) {
   assert(TheDef && "The def record is nullptr ?");
 
   std::string Op = DI->getOperator()->getAsString();
diff --git a/llvm/utils/TableGen/VarLenCodeEmitterGen.h b/llvm/utils/TableGen/VarLenCodeEmitterGen.h
index 330b791b..5bdedee 100644
--- a/llvm/utils/TableGen/VarLenCodeEmitterGen.h
+++ b/llvm/utils/TableGen/VarLenCodeEmitterGen.h
@@ -14,10 +14,51 @@
 #ifndef LLVM_UTILS_TABLEGEN_VARLENCODEEMITTERGEN_H
 #define LLVM_UTILS_TABLEGEN_VARLENCODEEMITTERGEN_H
 
+#include "llvm/TableGen/Record.h"
+
 namespace llvm {
 
-class RecordKeeper;
-class raw_ostream;
+struct EncodingSegment {
+  unsigned BitWidth;
+  const Init *Value;
+  StringRef CustomEncoder = "";
+};
+
+class VarLenInst {
+  const RecordVal *TheDef;
+  size_t NumBits;
+
+  // Set if any of the segment is not fixed value.
+  bool HasDynamicSegment;
+
+  SmallVector<EncodingSegment, 4> Segments;
+
+  void buildRec(const DagInit *DI);
+
+  StringRef getCustomEncoderName(const Init *EI) const {
+    if (const auto *DI = dyn_cast<DagInit>(EI)) {
+      if (DI->getNumArgs() && isa<StringInit>(DI->getArg(0)))
+        return cast<StringInit>(DI->getArg(0))->getValue();
+    }
+    return "";
+  }
+
+public:
+  VarLenInst() : TheDef(nullptr), NumBits(0U), HasDynamicSegment(false) {}
+
+  explicit VarLenInst(const DagInit *DI, const RecordVal *TheDef);
+
+  /// Number of bits
+  size_t size() const { return NumBits; }
+
+  using const_iterator = decltype(Segments)::const_iterator;
+
+  const_iterator begin() const { return Segments.begin(); }
+  const_iterator end() const { return Segments.end(); }
+  size_t getNumSegments() const { return Segments.size(); }
+
+  bool isFixedValueOnly() const { return !HasDynamicSegment; }
+};
 
 void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS);
 
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
index d595bef..f953440 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
@@ -433,6 +433,14 @@ static_library("builtins") {
     }
   }
 
+  if (current_cpu == "avr") {
+    sources += [
+      "avr/exit.S",
+      "avr/mulhi3.S",
+      "avr/mulqi3.S",
+    ]
+  }
+
   if (current_cpu == "hexagon") {
     sources += [
       "hexagon/common_entry_exit_abi1.S",
diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/MacOSX/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/MacOSX/BUILD.gn
index f4a3a0d..553ec5e 100644
--- a/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/MacOSX/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/MacOSX/BUILD.gn
@@ -34,6 +34,7 @@ static_library("MacOSX") {
   include_dirs = [ "//lldb/source" ]
   sources = [
     "PlatformDarwin.cpp",
+    "PlatformDarwinDevice.cpp",
     "PlatformDarwinKernel.cpp",
     "PlatformMacOSX.cpp",
     "PlatformRemoteAppleBridge.cpp",
diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Linux/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Linux/BUILD.gn
index 43c8005..4d78791 100644
--- a/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Linux/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Linux/BUILD.gn
@@ -25,6 +25,7 @@ static_library("Linux") {
     "NativeRegisterContextLinux_x86_64.cpp",
     "NativeThreadLinux.cpp",
     "Perf.cpp",
+    "Procfs.cpp",
     "SingleStepCheck.cpp",
   ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn
index 27616c9..3026f88 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn
@@ -19,6 +19,7 @@ static_library("Object") {
     "COFFImportFile.cpp",
     "COFFModuleDefinition.cpp",
     "COFFObjectFile.cpp",
+    "DXContainer.cpp",
     "Decompressor.cpp",
     "ELF.cpp",
     "ELFObjectFile.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn
index fd2aea7f..1a73076 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn
@@ -24,6 +24,7 @@ executable("llvm-reduce") {
     "deltas/ReduceGlobalValues.cpp",
     "deltas/ReduceGlobalVarInitializers.cpp",
     "deltas/ReduceGlobalVars.cpp",
+    "deltas/ReduceIRReferences.cpp",
     "deltas/ReduceInstructions.cpp",
     "deltas/ReduceInstructionsMIR.cpp",
     "deltas/ReduceMetadata.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn
index 161ad60..1c4a32e 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn
@@ -9,6 +9,7 @@ unittest("ObjectTests") {
   ]
   sources = [
     "ArchiveTest.cpp",
+    "DXContainerTest.cpp",
     "ELFObjectFileTest.cpp",
     "ELFTest.cpp",
     "ELFTypesTest.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn
index 034fbe3..412557d 100644
--- a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn
@@ -27,13 +27,13 @@ executable("llvm-tblgen") {
     "DAGISelMatcherEmitter.cpp",
     "DAGISelMatcherGen.cpp",
     "DAGISelMatcherOpt.cpp",
+    "DecoderEmitter.cpp",
     "DFAEmitter.cpp",
     "DFAPacketizerEmitter.cpp",
     "DirectiveEmitter.cpp",
     "DisassemblerEmitter.cpp",
     "ExegesisEmitter.cpp",
     "FastISelEmitter.cpp",
-    "FixedLenDecoderEmitter.cpp",
     "GICombinerEmitter.cpp",
     "GlobalISelEmitter.cpp",
     "InfoByHwMode.cpp",
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
index 4395383..18e65c8 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
@@ -999,18 +999,17 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> {
       /*desc=*/[{
         Returns the statically-known loop ranges. Composes
         `getShapesToLoopsMap()` with the result of `getStaticShape`.
-        Returns None if `getShapesToLoopsMap()` fails. Returns
-        ShapeType::kDynamicSize for non-statically-known loop ranges.
+        Returns ShapeType::kDynamicSize for non-statically-known loop ranges.
+        This is expected to be called by a valid Linalg op
       }],
-      /*retTy=*/"Optional<SmallVector<int64_t, 4>>",
+      /*retTy=*/"SmallVector<int64_t, 4>",
       /*methodName=*/"getStaticLoopRanges",
       /*args=*/(ins),
       /*methodBody=*/"",
       /*defaultImplementation=*/[{
         SmallVector<int64_t> viewSizes = getStaticShape();
         AffineMap invertedMap = getShapesToLoopsMap();
-        if (!invertedMap)
-          return {};
+        assert(invertedMap && "expected a valid Linalg op to call the method");
         return invertedMap.compose(viewSizes);
       }]
     >,
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 39ccb70..e69891e 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -99,8 +99,8 @@ def ParallelOp : OpenMP_Op<"parallel", [
     of the parallel region.
   }];
 
-  let arguments = (ins Optional<AnyType>:$if_expr_var,
-             Optional<AnyType>:$num_threads_var,
+  let arguments = (ins Optional<I1>:$if_expr_var,
+             Optional<IntLikeType>:$num_threads_var,
              Variadic<AnyType>:$allocate_vars,
              Variadic<AnyType>:$allocators_vars,
              Variadic<OpenMP_PointerLikeType>:$reduction_vars,
@@ -999,6 +999,40 @@ def ThreadprivateOp : OpenMP_Op<"threadprivate"> {
 }
 
 //===----------------------------------------------------------------------===//
+// 2.18.1 Cancel Construct
+//===----------------------------------------------------------------------===//
+def CancelOp : OpenMP_Op<"cancel"> {
+  let summary = "cancel directive";
+  let description = [{
+    The cancel construct activates cancellation of the innermost enclosing
+    region of the type specified.
+  }];
+  let arguments = (ins CancellationConstructTypeAttr:$cancellation_construct_type_val,
+                       Optional<I1>:$if_expr);
+  let assemblyFormat = [{ `cancellation_construct_type` `(`
+                          custom<ClauseAttr>($cancellation_construct_type_val) `)`
+                          ( `if` `(` $if_expr^ `)` )? attr-dict}];
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// 2.18.2 Cancellation Point Construct
+//===----------------------------------------------------------------------===//
+def CancellationPointOp : OpenMP_Op<"cancellationpoint"> {
+  let summary = "cancellation point directive";
+  let description = [{
+    The cancellation point construct introduces a user-defined cancellation
+    point at which implicit or explicit tasks check if cancellation of the
+    innermost enclosing region of the type specified has been activated.
+  }];
+  let arguments = (ins CancellationConstructTypeAttr:$cancellation_construct_type_val);
+  let assemblyFormat = [{ `cancellation_construct_type` `(`
+                           custom<ClauseAttr>($cancellation_construct_type_val) `)`
+                           attr-dict}];
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
 // 2.19.5.7 declare reduction Directive
 //===----------------------------------------------------------------------===//
 
diff --git a/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td b/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td
index fbc73c0..44dc6cd 100644
--- a/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td
+++ b/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td
@@ -409,14 +409,18 @@ def PDLInterp_CreateOperationOp
   let description = [{
     `pdl_interp.create_operation` operations create an `Operation` instance with
     the specified attributes, operands, and result types. See `pdl.operation`
-    for a more detailed description on the interpretation of the arguments to
-    this operation.
+    for a more detailed description on the general interpretation of the arguments
+    to this operation.
 
     Example:
 
     ```mlir
     // Create an instance of a `foo.op` operation.
     %op = pdl_interp.create_operation "foo.op"(%arg0 : !pdl.value) {"attrA" = %attr0} -> (%type : !pdl.type)
+    
+    // Create an instance of a `foo.op` operation that has inferred result types
+    // (using the InferTypeOpInterface).
+    %op = pdl_interp.create_operation "foo.op"(%arg0 : !pdl.value) {"attrA" = %attr0} -> <inferred>
     ```
   }];
 
@@ -424,22 +428,26 @@ def PDLInterp_CreateOperationOp
                        Variadic<PDL_InstOrRangeOf<PDL_Value>>:$inputOperands,
                        Variadic<PDL_Attribute>:$inputAttributes,
                        StrArrayAttr:$inputAttributeNames,
-                       Variadic<PDL_InstOrRangeOf<PDL_Type>>:$inputResultTypes);
+                       Variadic<PDL_InstOrRangeOf<PDL_Type>>:$inputResultTypes,
+                       UnitAttr:$inferredResultTypes);
   let results = (outs PDL_Operation:$resultOp);
 
   let builders = [
     OpBuilder<(ins "StringRef":$name, "ValueRange":$types,
-      "ValueRange":$operands, "ValueRange":$attributes,
-      "ArrayAttr":$attributeNames), [{
+      "bool":$inferredResultTypes, "ValueRange":$operands,
+      "ValueRange":$attributes, "ArrayAttr":$attributeNames), [{
       build($_builder, $_state, $_builder.getType<pdl::OperationType>(), name,
-            operands, attributes, attributeNames, types);
+            operands, attributes, attributeNames, types, inferredResultTypes);
     }]>
   ];
   let assemblyFormat = [{
-    $name (`(` $inputOperands^ `:` type($inputOperands) `)`)?
+    $name (`(` $inputOperands^ `:` type($inputOperands) `)`)? ``
     custom<CreateOperationOpAttributes>($inputAttributes, $inputAttributeNames)
-    (`->` `(` $inputResultTypes^ `:` type($inputResultTypes) `)`)? attr-dict
+    custom<CreateOperationOpResults>($inputResultTypes, type($inputResultTypes),
+                                     $inferredResultTypes)
+    attr-dict
   }];
+  let hasVerifier = 1;
 }
 
 //===----------------------------------------------------------------------===//
@@ -962,33 +970,6 @@ def PDLInterp_GetValueTypeOp : PDLInterp_Op<"get_value_type", [NoSideEffect,
 }
 
 //===----------------------------------------------------------------------===//
-// pdl_interp::InferredTypesOp
-//===----------------------------------------------------------------------===//
-
-def PDLInterp_InferredTypesOp : PDLInterp_Op<"inferred_types"> {
-  let summary = "Generate a handle to a range of Types that are \"inferred\"";
-  let description = [{
-    `pdl_interp.inferred_types` operations generate handles to ranges of types
-    that should be inferred. This signals to other operations, such as
-    `pdl_interp.create_operation`, that these types should be inferred.
-
-    Example:
-
-    ```mlir
-    %types = pdl_interp.inferred_types
-    ```
-  }];
-  let results = (outs PDL_RangeOf<PDL_Type>:$result);
-  let assemblyFormat = "attr-dict";
-  let builders = [
-    OpBuilder<(ins), [{
-      build($_builder, $_state,
-            pdl::RangeType::get($_builder.getType<pdl::TypeType>()));
-    }]>
-  ];
-}
-
-//===----------------------------------------------------------------------===//
 // pdl_interp::IsNotNullOp
 //===----------------------------------------------------------------------===//
 
diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.h b/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.h
index b4c8ada..d029c21 100644
--- a/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.h
+++ b/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.h
@@ -74,6 +74,10 @@ public:
   /// This is helpful for transformations that apply to a particular handle.
   ArrayRef<Operation *> getPayloadOps(Value value) const;
 
+  /// Returns the Transform IR handle for the given Payload IR op if it exists
+  /// in the state, null otherwise.
+  Value getHandleForPayloadOp(Operation *op) const;
+
   /// Applies the transformation specified by the given transform op and updates
   /// the state accordingly.
   LogicalResult applyTransform(TransformOpInterface transform);
@@ -185,6 +189,10 @@ public:
     /// Provides read-only access to the parent TransformState object.
     const TransformState &getTransformState() const { return state; }
 
+    /// Replaces the given payload op with another op. If the replacement op is
+    /// null, removes the association of the payload op with its handle.
+    LogicalResult replacePayloadOp(Operation *op, Operation *replacement);
+
   private:
     /// Back-reference to the state that is being extended.
     TransformState &state;
@@ -276,9 +284,17 @@ private:
   /// The callback function is called once per associated operation and is
   /// expected to return the modified operation or nullptr. In the latter case,
   /// the corresponding operation is no longer associated with the transform IR
-  /// value.
-  void updatePayloadOps(Value value,
-                        function_ref<Operation *(Operation *)> callback);
+  /// value. May fail if the operation produced by the update callback is
+  /// already associated with a different Transform IR handle value.
+  LogicalResult
+  updatePayloadOps(Value value,
+                   function_ref<Operation *(Operation *)> callback);
+
+  /// Attempts to record the mapping between the given Payload IR operation and
+  /// the given Transform IR handle. Fails and reports an error if the operation
+  /// is already tracked by another handle.
+  static LogicalResult tryEmplaceReverseMapping(Mappings &map, Operation *op,
+                                                Value handle);
 
   /// The mappings between transform IR values and payload IR ops, aggregated by
   /// the region in which the transform IR values are defined.
diff --git a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
index 44eab50..e536ae8 100644
--- a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
+++ b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
@@ -312,7 +312,7 @@ public:
   explicit DynamicMemRefType(const StridedMemRefType<T, N> &memRef)
       : rank(N), basePtr(memRef.basePtr), data(memRef.data),
         offset(memRef.offset), sizes(memRef.sizes), strides(memRef.strides) {}
-  explicit DynamicMemRefType(const UnrankedMemRefType<T> &memRef)
+  explicit DynamicMemRefType(const ::UnrankedMemRefType<T> &memRef)
       : rank(memRef.rank) {
     auto *desc = static_cast<StridedMemRefType<T, 1> *>(memRef.descriptor);
     basePtr = desc->basePtr;
@@ -334,8 +334,8 @@ public:
 // Small runtime support library for memref.copy lowering during codegen.
 //===----------------------------------------------------------------------===//
 extern "C" MLIR_CRUNNERUTILS_EXPORT void
-memrefCopy(int64_t elemSize, UnrankedMemRefType<char> *src,
-           UnrankedMemRefType<char> *dst);
+memrefCopy(int64_t elemSize, ::UnrankedMemRefType<char> *src,
+           ::UnrankedMemRefType<char> *dst);
 
 //===----------------------------------------------------------------------===//
 // Small runtime support library for vector.print lowering during codegen.
diff --git a/mlir/include/mlir/IR/Operation.h b/mlir/include/mlir/IR/Operation.h
index 4692104..4594cb6 100644
--- a/mlir/include/mlir/IR/Operation.h
+++ b/mlir/include/mlir/IR/Operation.h
@@ -811,6 +811,11 @@ template <typename T> struct isa_impl<T, ::mlir::Operation> {
   }
 };
 
+/// Allow isa<Operation *> on operations.
+template <> struct isa_impl<::mlir::Operation *, ::mlir::Operation> {
+  static inline bool doit(const ::mlir::Operation &op) { return true; }
+};
+
 /// Provide specializations for operation casts as the resulting T is value
 /// typed.
 template <typename T> struct cast_retty_impl<T, ::mlir::Operation *> {
diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp
index 6057a21..c24620a 100644
--- a/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp
+++ b/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp
@@ -100,11 +100,12 @@ private:
                         function_ref<Value(Value)> mapRewriteValue);
 
   /// Generate the values used for resolving the result types of an operation
-  /// created within a dag rewriter region.
+  /// created within a dag rewriter region. If the result types of the operation
+  /// should be inferred, `hasInferredResultTypes` is set to true.
   void generateOperationResultTypeRewriter(
-      pdl::OperationOp op, SmallVectorImpl<Value> &types,
-      DenseMap<Value, Value> &rewriteValues,
-      function_ref<Value(Value)> mapRewriteValue);
+      pdl::OperationOp op, function_ref<Value(Value)> mapRewriteValue,
+      SmallVectorImpl<Value> &types, DenseMap<Value, Value> &rewriteValues,
+      bool &hasInferredResultTypes);
 
   /// A builder to use when generating interpreter operations.
   OpBuilder builder;
@@ -707,15 +708,16 @@ void PatternLowering::generateRewriter(
   for (Value attr : operationOp.attributes())
     attributes.push_back(mapRewriteValue(attr));
 
+  bool hasInferredResultTypes = false;
   SmallVector<Value, 2> types;
-  generateOperationResultTypeRewriter(operationOp, types, rewriteValues,
-                                      mapRewriteValue);
+  generateOperationResultTypeRewriter(operationOp, mapRewriteValue, types,
+                                      rewriteValues, hasInferredResultTypes);
 
   // Create the new operation.
   Location loc = operationOp.getLoc();
   Value createdOp = builder.create<pdl_interp::CreateOperationOp>(
-      loc, *operationOp.name(), types, operands, attributes,
-      operationOp.attributeNames());
+      loc, *operationOp.name(), types, hasInferredResultTypes, operands,
+      attributes, operationOp.attributeNames());
   rewriteValues[operationOp.op()] = createdOp;
 
   // Generate accesses for any results that have their types constrained.
@@ -825,9 +827,9 @@ void PatternLowering::generateRewriter(
 }
 
 void PatternLowering::generateOperationResultTypeRewriter(
-    pdl::OperationOp op, SmallVectorImpl<Value> &types,
-    DenseMap<Value, Value> &rewriteValues,
-    function_ref<Value(Value)> mapRewriteValue) {
+    pdl::OperationOp op, function_ref<Value(Value)> mapRewriteValue,
+    SmallVectorImpl<Value> &types, DenseMap<Value, Value> &rewriteValues,
+    bool &hasInferredResultTypes) {
   // Look for an operation that was replaced by `op`. The result types will be
   // inferred from the results that were replaced.
   Block *rewriterBlock = op->getBlock();
@@ -851,36 +853,54 @@ void PatternLowering::generateOperationResultTypeRewriter(
     return;
   }
 
-  // Check if the operation has type inference support.
-  if (op.hasTypeInference()) {
-    types.push_back(builder.create<pdl_interp::InferredTypesOp>(op.getLoc()));
-    return;
-  }
-
-  // Otherwise, handle inference for each of the result types individually.
+  // Try to handle resolution for each of the result types individually. This is
+  // preferred over type inferrence because it will allow for us to use existing
+  // types directly, as opposed to trying to rebuild the type list.
   OperandRange resultTypeValues = op.types();
-  types.reserve(resultTypeValues.size());
-  for (const auto &it : llvm::enumerate(resultTypeValues)) {
-    Value resultType = it.value();
+  auto tryResolveResultTypes = [&] {
+    types.reserve(resultTypeValues.size());
+    for (const auto &it : llvm::enumerate(resultTypeValues)) {
+      Value resultType = it.value();
+
+      // Check for an already translated value.
+      if (Value existingRewriteValue = rewriteValues.lookup(resultType)) {
+        types.push_back(existingRewriteValue);
+        continue;
+      }
 
-    // Check for an already translated value.
-    if (Value existingRewriteValue = rewriteValues.lookup(resultType)) {
-      types.push_back(existingRewriteValue);
-      continue;
-    }
+      // Check for an input from the matcher.
+      if (resultType.getDefiningOp()->getBlock() != rewriterBlock) {
+        types.push_back(mapRewriteValue(resultType));
+        continue;
+      }
 
-    // Check for an input from the matcher.
-    if (resultType.getDefiningOp()->getBlock() != rewriterBlock) {
-      types.push_back(mapRewriteValue(resultType));
-      continue;
+      // Otherwise, we couldn't infer the result types. Bail out here to see if
+      // we can infer the types for this operation from another way.
+      types.clear();
+      return failure();
     }
+    return success();
+  };
+  if (!resultTypeValues.empty() && succeeded(tryResolveResultTypes()))
+    return;
 
-    // The verifier asserts that the result types of each pdl.operation can be
-    // inferred. If we reach here, there is a bug either in the logic above or
-    // in the verifier for pdl.operation.
-    op->emitOpError() << "unable to infer result type for operation";
-    llvm_unreachable("unable to infer result type for operation");
+  // Otherwise, check if the operation has type inference support itself.
+  if (op.hasTypeInference()) {
+    hasInferredResultTypes = true;
+    return;
   }
+
+  // If the types could not be inferred from any context and there weren't any
+  // explicit result types, assume the user actually meant for the operation to
+  // have no results.
+  if (resultTypeValues.empty())
+    return;
+
+  // The verifier asserts that the result types of each pdl.operation can be
+  // inferred. If we reach here, there is a bug either in the logic above or
+  // in the verifier for pdl.operation.
+  op->emitOpError() << "unable to infer result type for operation";
+  llvm_unreachable("unable to infer result type for operation");
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
index 6d015f5..ad6af0f 100644
--- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@@ -622,6 +622,10 @@ LogicalResult mlir::normalizeAffineFor(AffineForOp op) {
                      newUbExprs, opBuilder.getContext());
   canonicalizeMapAndOperands(&newUbMap, &ubOperands);
 
+  SmallVector<Value, 4> lbOperands(lb.getOperands().begin(),
+                                   lb.getOperands().begin() +
+                                       lb.getMap().getNumDims());
+
   // Normalize the loop.
   op.setUpperBound(ubOperands, newUbMap);
   op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
@@ -630,9 +634,6 @@ LogicalResult mlir::normalizeAffineFor(AffineForOp op) {
   // Calculate the Value of new loopIV. Create affine.apply for the value of
   // the loopIV in normalized loop.
   opBuilder.setInsertionPointToStart(op.getBody());
-  SmallVector<Value, 4> lbOperands(lb.getOperands().begin(),
-                                   lb.getOperands().begin() +
-                                       lb.getMap().getNumDims());
   // Add an extra dim operand for loopIV.
   lbOperands.push_back(op.getInductionVar());
   // Add symbol operands from lower bound.
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
index 4c79672..c916d15 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
@@ -732,23 +732,20 @@ LogicalResult mlir::linalg::detail::verifyStructuredOpInterface(Operation *op) {
   }
 
   // Check if given shapes match to inferred shapes.
-  Optional<SmallVector<int64_t, 4>> endLoopRangeValues =
-      linalgOp.getStaticLoopRanges();
-  if (!endLoopRangeValues)
-    return op->emitOpError("unable to find loop range for operation");
-  SmallVector<int64_t, 4> startLoopRangeValues((*endLoopRangeValues).size(), 0);
+  SmallVector<int64_t, 4> endLoopRangeValues = linalgOp.getStaticLoopRanges();
+  SmallVector<int64_t, 4> startLoopRangeValues(endLoopRangeValues.size(), 0);
 
   // Verify only static cases since we can't get exact dimension sizes and loop
   // ranges for dynamic cases in this stage.
-  if (llvm::none_of(*endLoopRangeValues, ShapedType::isDynamic)) {
-    for (int64_t &range : *endLoopRangeValues)
+  if (llvm::none_of(endLoopRangeValues, ShapedType::isDynamic)) {
+    for (int64_t &range : endLoopRangeValues)
       range -= 1;
     for (OpOperand *opOperand : linalgOp.getInputAndOutputOperands()) {
       AffineMap indexingMap = linalgOp.getTiedIndexingMap(opOperand);
       SmallVector<int64_t, 4> startIndices =
           indexingMap.compose(startLoopRangeValues);
       SmallVector<int64_t, 4> endIndices =
-          indexingMap.compose(*endLoopRangeValues);
+          indexingMap.compose(endLoopRangeValues);
       ArrayRef<int64_t> shape = linalgOp.getShape(opOperand);
       for (auto dim : llvm::seq<int64_t>(0, shape.size())) {
         // Ignore dynamic dimension or the case that the dimension size is 0
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
index cc0ec08..a24fdbe3 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
@@ -518,12 +518,8 @@ LogicalResult ExpansionInfo::compute(LinalgOp linalgOp,
     return failure();
   AffineMap fusedIndexMap = linalgOp.getTiedIndexingMap(fusableOpOperand);
 
-  Optional<SmallVector<int64_t, 4>> originalLoopRange =
-      linalgOp.getStaticLoopRanges();
-  if (!originalLoopRange)
-    return rewriter.notifyMatchFailure(linalgOp, "unable to find loop range");
-  originalLoopExtent.assign(originalLoopRange->begin(),
-                            originalLoopRange->end());
+  SmallVector<int64_t, 4> originalLoopRange = linalgOp.getStaticLoopRanges();
+  originalLoopExtent.assign(originalLoopRange.begin(), originalLoopRange.end());
 
   reassociation.clear();
   expandedShapeMap.clear();
diff --git a/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp b/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp
index 8f94de4..da3c520 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp
@@ -73,12 +73,10 @@ FailureOr<LinalgOp> mlir::linalg::splitReduction(
   op.getReductionDims(dims);
   assert(dims.size() == 1);
   unsigned reductionDim = dims[0];
-  Optional<SmallVector<int64_t, 4>> loopRanges = op.getStaticLoopRanges();
-  if (!loopRanges)
-    return b.notifyMatchFailure(op, "Cannot analyze loops");
-  int64_t reductionDimSize = (*loopRanges)[reductionDim];
+  SmallVector<int64_t, 4> loopRanges = op.getStaticLoopRanges();
+  int64_t reductionDimSize = loopRanges[reductionDim];
   if (reductionDimSize == ShapedType::kDynamicSize ||
-      reductionDimSize % ratio != 0 || insertDimIndex >= loopRanges->size())
+      reductionDimSize % ratio != 0 || insertDimIndex >= loopRanges.size())
     return b.notifyMatchFailure(
         op, "Reduction dimension not divisible by split ratio");
   SmallVector<Operation *, 4> combinerOps;
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index fa2beca..5540eec 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -950,6 +950,80 @@ LogicalResult AtomicCaptureOp::verifyRegions() {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// Verifier for CancelOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult CancelOp::verify() {
+  ClauseCancellationConstructType cct = cancellation_construct_type_val();
+  Operation *parentOp = (*this)->getParentOp();
+
+  if (!parentOp) {
+    return emitOpError() << "must be used within a region supporting "
+                            "cancel directive";
+  }
+
+  if ((cct == ClauseCancellationConstructType::Parallel) &&
+      !isa<ParallelOp>(parentOp)) {
+    return emitOpError() << "cancel parallel must appear "
+                         << "inside a parallel region";
+  } else if (cct == ClauseCancellationConstructType::Loop) {
+    if (!isa<WsLoopOp>(parentOp)) {
+      return emitOpError() << "cancel loop must appear "
+                           << "inside a worksharing-loop region";
+    } else {
+      if (cast<WsLoopOp>(parentOp).nowaitAttr()) {
+        return emitError() << "A worksharing construct that is canceled "
+                           << "must not have a nowait clause";
+      } else if (cast<WsLoopOp>(parentOp).ordered_valAttr()) {
+        return emitError() << "A worksharing construct that is canceled "
+                           << "must not have an ordered clause";
+      }
+    }
+  } else if (cct == ClauseCancellationConstructType::Sections) {
+    if (!(isa<SectionsOp>(parentOp) || isa<SectionOp>(parentOp))) {
+      return emitOpError() << "cancel sections must appear "
+                           << "inside a sections region";
+    }
+    if (parentOp->getParentOp() && isa<SectionsOp>(parentOp->getParentOp()) &&
+        cast<SectionsOp>(parentOp->getParentOp()).nowaitAttr()) {
+      return emitError() << "A sections construct that is canceled "
+                         << "must not have a nowait clause";
+    }
+  }
+  // TODO : Add more when we support taskgroup.
+  return success();
+}
+//===----------------------------------------------------------------------===//
+// Verifier for CancelOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult CancellationPointOp::verify() {
+  ClauseCancellationConstructType cct = cancellation_construct_type_val();
+  Operation *parentOp = (*this)->getParentOp();
+
+  if (!parentOp) {
+    return emitOpError() << "must be used within a region supporting "
+                            "cancellation point directive";
+  }
+
+  if ((cct == ClauseCancellationConstructType::Parallel) &&
+      !(isa<ParallelOp>(parentOp))) {
+    return emitOpError() << "cancellation point parallel must appear "
+                         << "inside a parallel region";
+  } else if ((cct == ClauseCancellationConstructType::Loop) &&
+             !isa<WsLoopOp>(parentOp)) {
+    return emitOpError() << "cancellation point loop must appear "
+                         << "inside a worksharing-loop region";
+  } else if ((cct == ClauseCancellationConstructType::Sections) &&
+             !(isa<SectionsOp>(parentOp) || isa<SectionOp>(parentOp))) {
+    return emitOpError() << "cancellation point sections must appear "
+                         << "inside a sections region";
+  }
+  // TODO : Add more when we support taskgroup.
+  return success();
+}
+
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc"
 
diff --git a/mlir/lib/Dialect/PDLInterp/IR/PDLInterp.cpp b/mlir/lib/Dialect/PDLInterp/IR/PDLInterp.cpp
index 64a2be6..40e5599 100644
--- a/mlir/lib/Dialect/PDLInterp/IR/PDLInterp.cpp
+++ b/mlir/lib/Dialect/PDLInterp/IR/PDLInterp.cpp
@@ -47,6 +47,23 @@ static LogicalResult verifySwitchOp(OpT op) {
 // pdl_interp::CreateOperationOp
 //===----------------------------------------------------------------------===//
 
+LogicalResult CreateOperationOp::verify() {
+  if (!getInferredResultTypes())
+    return success();
+  if (!getInputResultTypes().empty()) {
+    return emitOpError("with inferred results cannot also have "
+                       "explicit result types");
+  }
+  OperationName opName(getName(), getContext());
+  if (!opName.hasInterface<InferTypeOpInterface>()) {
+    return emitOpError()
+           << "has inferred results, but the created operation '" << opName
+           << "' does not support result type inference (or is not "
+              "registered)";
+  }
+  return success();
+}
+
 static ParseResult parseCreateOperationOpAttributes(
     OpAsmParser &p,
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &attrOperands,
@@ -82,6 +99,41 @@ static void printCreateOperationOpAttributes(OpAsmPrinter &p,
   p << '}';
 }
 
+static ParseResult parseCreateOperationOpResults(
+    OpAsmParser &p,
+    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &resultOperands,
+    SmallVectorImpl<Type> &resultTypes, UnitAttr &inferredResultTypes) {
+  if (failed(p.parseOptionalArrow()))
+    return success();
+
+  // Handle the case of inferred results.
+  if (succeeded(p.parseOptionalLess())) {
+    if (p.parseKeyword("inferred") || p.parseGreater())
+      return failure();
+    inferredResultTypes = p.getBuilder().getUnitAttr();
+    return success();
+  }
+
+  // Otherwise, parse the explicit results.
+  return failure(p.parseLParen() || p.parseOperandList(resultOperands) ||
+                 p.parseColonTypeList(resultTypes) || p.parseRParen());
+}
+
+static void printCreateOperationOpResults(OpAsmPrinter &p, CreateOperationOp op,
+                                          OperandRange resultOperands,
+                                          TypeRange resultTypes,
+                                          UnitAttr inferredResultTypes) {
+  // Handle the case of inferred results.
+  if (inferredResultTypes) {
+    p << " -> <inferred>";
+    return;
+  }
+
+  // Otherwise, handle the explicit results.
+  if (!resultTypes.empty())
+    p << " -> (" << resultOperands << " : " << resultTypes << ")";
+}
+
 //===----------------------------------------------------------------------===//
 // pdl_interp::ForEachOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp
index c96f573..3e11b67 100644
--- a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp
+++ b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp
@@ -41,6 +41,27 @@ transform::TransformState::getPayloadOps(Value value) const {
   return iter->getSecond();
 }
 
+Value transform::TransformState::getHandleForPayloadOp(Operation *op) const {
+  for (const Mappings &mapping : llvm::make_second_range(mappings)) {
+    if (Value handle = mapping.reverse.lookup(op))
+      return handle;
+  }
+  return Value();
+}
+
+LogicalResult transform::TransformState::tryEmplaceReverseMapping(
+    Mappings &map, Operation *operation, Value handle) {
+  auto insertionResult = map.reverse.insert({operation, handle});
+  if (!insertionResult.second) {
+    InFlightDiagnostic diag = operation->emitError()
+                              << "operation tracked by two handles";
+    diag.attachNote(handle.getLoc()) << "handle";
+    diag.attachNote(insertionResult.first->second.getLoc()) << "handle";
+    return diag;
+  }
+  return success();
+}
+
 LogicalResult
 transform::TransformState::setPayloadOps(Value value,
                                          ArrayRef<Operation *> targets) {
@@ -63,14 +84,8 @@ transform::TransformState::setPayloadOps(Value value,
   // expressed using the dialect and may be constructed by valid API calls from
   // valid IR. Emit an error here.
   for (Operation *op : targets) {
-    auto insertionResult = mappings.reverse.insert({op, value});
-    if (!insertionResult.second) {
-      InFlightDiagnostic diag = op->emitError()
-                                << "operation tracked by two handles";
-      diag.attachNote(value.getLoc()) << "handle";
-      diag.attachNote(insertionResult.first->second.getLoc()) << "handle";
-      return diag;
-    }
+    if (failed(tryEmplaceReverseMapping(mappings, op, value)))
+      return failure();
   }
 
   return success();
@@ -83,19 +98,26 @@ void transform::TransformState::removePayloadOps(Value value) {
   mappings.direct.erase(value);
 }
 
-void transform::TransformState::updatePayloadOps(
+LogicalResult transform::TransformState::updatePayloadOps(
     Value value, function_ref<Operation *(Operation *)> callback) {
-  auto it = getMapping(value).direct.find(value);
-  assert(it != getMapping(value).direct.end() && "unknown handle");
+  Mappings &mappings = getMapping(value);
+  auto it = mappings.direct.find(value);
+  assert(it != mappings.direct.end() && "unknown handle");
   SmallVector<Operation *> &association = it->getSecond();
   SmallVector<Operation *> updated;
   updated.reserve(association.size());
 
-  for (Operation *op : association)
-    if (Operation *updatedOp = callback(op))
+  for (Operation *op : association) {
+    mappings.reverse.erase(op);
+    if (Operation *updatedOp = callback(op)) {
       updated.push_back(updatedOp);
+      if (failed(tryEmplaceReverseMapping(mappings, updatedOp, value)))
+        return failure();
+    }
+  }
 
   std::swap(association, updated);
+  return success();
 }
 
 LogicalResult
@@ -132,8 +154,21 @@ transform::TransformState::applyTransform(TransformOpInterface transform) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// TransformState::Extension
+//===----------------------------------------------------------------------===//
+
 transform::TransformState::Extension::~Extension() = default;
 
+LogicalResult
+transform::TransformState::Extension::replacePayloadOp(Operation *op,
+                                                       Operation *replacement) {
+  return state.updatePayloadOps(state.getHandleForPayloadOp(op),
+                                [&](Operation *current) {
+                                  return current == op ? replacement : current;
+                                });
+}
+
 //===----------------------------------------------------------------------===//
 // TransformResults
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/IR/BuiltinTypes.cpp b/mlir/lib/IR/BuiltinTypes.cpp
index d570052..841da97 100644
--- a/mlir/lib/IR/BuiltinTypes.cpp
+++ b/mlir/lib/IR/BuiltinTypes.cpp
@@ -945,13 +945,11 @@ MemRefType mlir::canonicalizeStridedLayout(MemRefType t) {
 AffineExpr mlir::makeCanonicalStridedLayoutExpr(ArrayRef<int64_t> sizes,
                                                 ArrayRef<AffineExpr> exprs,
                                                 MLIRContext *context) {
-  assert(!sizes.empty() && !exprs.empty() &&
-         "expected non-empty sizes and exprs");
-
   // Size 0 corner case is useful for canonicalizations.
-  if (llvm::is_contained(sizes, 0))
+  if (sizes.empty() || llvm::is_contained(sizes, 0))
     return getAffineConstantExpr(0, context);
 
+  assert(!exprs.empty() && "expected exprs");
   auto maps = AffineMap::inferFromExprList(exprs);
   assert(!maps.empty() && "Expected one non-empty map");
   unsigned numDims = maps[0].getNumDims(), nSymbols = maps[0].getNumSymbols();
diff --git a/mlir/lib/Rewrite/ByteCode.cpp b/mlir/lib/Rewrite/ByteCode.cpp
index c2dc41a..ad4c078 100644
--- a/mlir/lib/Rewrite/ByteCode.cpp
+++ b/mlir/lib/Rewrite/ByteCode.cpp
@@ -162,6 +162,10 @@ enum OpCode : ByteCodeField {
 };
 } // namespace
 
+/// A marker used to indicate if an operation should infer types.
+static constexpr ByteCodeField kInferTypesMarker =
+    std::numeric_limits<ByteCodeField>::max();
+
 //===----------------------------------------------------------------------===//
 // ByteCode Generation
 //===----------------------------------------------------------------------===//
@@ -273,7 +277,6 @@ private:
   void generate(pdl_interp::GetResultsOp op, ByteCodeWriter &writer);
   void generate(pdl_interp::GetUsersOp op, ByteCodeWriter &writer);
   void generate(pdl_interp::GetValueTypeOp op, ByteCodeWriter &writer);
-  void generate(pdl_interp::InferredTypesOp op, ByteCodeWriter &writer);
   void generate(pdl_interp::IsNotNullOp op, ByteCodeWriter &writer);
   void generate(pdl_interp::RecordMatchOp op, ByteCodeWriter &writer);
   void generate(pdl_interp::ReplaceOp op, ByteCodeWriter &writer);
@@ -723,8 +726,7 @@ void Generator::generate(Operation *op, ByteCodeWriter &writer) {
   LLVM_DEBUG({
     // The following list must contain all the operations that do not
     // produce any bytecode.
-    if (!isa<pdl_interp::CreateAttributeOp, pdl_interp::CreateTypeOp,
-             pdl_interp::InferredTypesOp>(op))
+    if (!isa<pdl_interp::CreateAttributeOp, pdl_interp::CreateTypeOp>(op))
       writer.appendInline(op->getLoc());
   });
   TypeSwitch<Operation *>(op)
@@ -742,11 +744,11 @@ void Generator::generate(Operation *op, ByteCodeWriter &writer) {
             pdl_interp::GetOperandOp, pdl_interp::GetOperandsOp,
             pdl_interp::GetResultOp, pdl_interp::GetResultsOp,
             pdl_interp::GetUsersOp, pdl_interp::GetValueTypeOp,
-            pdl_interp::InferredTypesOp, pdl_interp::IsNotNullOp,
-            pdl_interp::RecordMatchOp, pdl_interp::ReplaceOp,
-            pdl_interp::SwitchAttributeOp, pdl_interp::SwitchTypeOp,
-            pdl_interp::SwitchTypesOp, pdl_interp::SwitchOperandCountOp,
-            pdl_interp::SwitchOperationNameOp, pdl_interp::SwitchResultCountOp>(
+            pdl_interp::IsNotNullOp, pdl_interp::RecordMatchOp,
+            pdl_interp::ReplaceOp, pdl_interp::SwitchAttributeOp,
+            pdl_interp::SwitchTypeOp, pdl_interp::SwitchTypesOp,
+            pdl_interp::SwitchOperandCountOp, pdl_interp::SwitchOperationNameOp,
+            pdl_interp::SwitchResultCountOp>(
           [&](auto interpOp) { this->generate(interpOp, writer); })
       .Default([](Operation *) {
         llvm_unreachable("unknown `pdl_interp` operation");
@@ -847,7 +849,13 @@ void Generator::generate(pdl_interp::CreateOperationOp op,
   writer.append(static_cast<ByteCodeField>(attributes.size()));
   for (auto it : llvm::zip(op.getInputAttributeNames(), attributes))
     writer.append(std::get<0>(it), std::get<1>(it));
-  writer.appendPDLValueList(op.getInputResultTypes());
+
+  // Add the result types. If the operation has inferred results, we use a
+  // marker "size" value. Otherwise, we add the list of explicit result types.
+  if (op.getInferredResultTypes())
+    writer.append(kInferTypesMarker);
+  else
+    writer.appendPDLValueList(op.getInputResultTypes());
 }
 void Generator::generate(pdl_interp::CreateTypeOp op, ByteCodeWriter &writer) {
   // Simply repoint the memory index of the result to the constant.
@@ -955,12 +963,6 @@ void Generator::generate(pdl_interp::GetValueTypeOp op,
     writer.append(OpCode::GetValueType, op.getResult(), op.getValue());
   }
 }
-
-void Generator::generate(pdl_interp::InferredTypesOp op,
-                         ByteCodeWriter &writer) {
-  // InferType maps to a null type as a marker for inferring result types.
-  getMemIndex(op.getResult()) = getMemIndex(Type());
-}
 void Generator::generate(pdl_interp::IsNotNullOp op, ByteCodeWriter &writer) {
   writer.append(OpCode::IsNotNull, op.getValue(), op.getSuccessors());
 }
@@ -1526,30 +1528,31 @@ void ByteCodeExecutor::executeCreateOperation(PatternRewriter &rewriter,
       state.addAttribute(name, attr);
   }
 
-  for (unsigned i = 0, e = read(); i != e; ++i) {
-    if (read<PDLValue::Kind>() == PDLValue::Kind::Type) {
-      state.types.push_back(read<Type>());
-      continue;
-    }
-
-    // If we find a null range, this signals that the types are infered.
-    if (TypeRange *resultTypes = read<TypeRange *>()) {
-      state.types.append(resultTypes->begin(), resultTypes->end());
-      continue;
-    }
-
-    // Handle the case where the operation has inferred types.
+  // Read in the result types. If the "size" is the sentinel value, this
+  // indicates that the result types should be inferred.
+  unsigned numResults = read();
+  if (numResults == kInferTypesMarker) {
     InferTypeOpInterface::Concept *inferInterface =
         state.name.getRegisteredInfo()->getInterface<InferTypeOpInterface>();
+    assert(inferInterface &&
+           "expected operation to provide InferTypeOpInterface");
 
     // TODO: Handle failure.
-    state.types.clear();
     if (failed(inferInterface->inferReturnTypes(
             state.getContext(), state.location, state.operands,
             state.attributes.getDictionary(state.getContext()), state.regions,
             state.types)))
       return;
-    break;
+  } else {
+    // Otherwise, this is a fixed number of results.
+    for (unsigned i = 0; i != numResults; ++i) {
+      if (read<PDLValue::Kind>() == PDLValue::Kind::Type) {
+        state.types.push_back(read<Type>());
+      } else {
+        TypeRange *resultTypes = read<TypeRange *>();
+        state.types.append(resultTypes->begin(), resultTypes->end());
+      }
+    }
   }
 
   Operation *resultOp = rewriter.create(state);
diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
index 90c3d63..1a73456 100644
--- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
+++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
@@ -347,10 +347,14 @@ Attribute Importer::getConstantAsAttr(llvm::Constant *value) {
     if (c->isString())
       return b.getStringAttr(c->getAsString());
   if (auto *c = dyn_cast<llvm::ConstantFP>(value)) {
-    if (c->getType()->isDoubleTy())
-      return b.getFloatAttr(FloatType::getF64(context), c->getValueAPF());
-    if (c->getType()->isFloatingPointTy())
-      return b.getFloatAttr(FloatType::getF32(context), c->getValueAPF());
+    auto *type = c->getType();
+    FloatType floatTy;
+    if (type->isBFloatTy())
+      floatTy = FloatType::getBF16(context);
+    else
+      floatTy = getDLFloatType(*context, type->getScalarSizeInBits());
+    assert(floatTy && "unsupported floating point type");
+    return b.getFloatAttr(floatTy, c->getValueAPF());
   }
   if (auto *f = dyn_cast<llvm::Function>(value))
     return SymbolRefAttr::get(b.getContext(), f->getName());
@@ -607,7 +611,7 @@ static StringRef lookupOperationNameFromOpcode(unsigned opcode) {
       // FIXME: cleanuppad
       // FIXME: catchpad
       // ICmp is handled specially.
-      // FIXME: fcmp
+      // FCmp is handled specially.
       // PHI is handled specially.
       INST(Freeze, Freeze), INST(Call, Call),
       // FIXME: select
@@ -649,7 +653,47 @@ static ICmpPredicate getICmpPredicate(llvm::CmpInst::Predicate p) {
   case llvm::CmpInst::Predicate::ICMP_UGE:
     return LLVM::ICmpPredicate::uge;
   }
-  llvm_unreachable("incorrect comparison predicate");
+  llvm_unreachable("incorrect integer comparison predicate");
+}
+
+static FCmpPredicate getFCmpPredicate(llvm::CmpInst::Predicate p) {
+  switch (p) {
+  default:
+    llvm_unreachable("incorrect comparison predicate");
+  case llvm::CmpInst::Predicate::FCMP_FALSE:
+    return LLVM::FCmpPredicate::_false;
+  case llvm::CmpInst::Predicate::FCMP_TRUE:
+    return LLVM::FCmpPredicate::_true;
+  case llvm::CmpInst::Predicate::FCMP_OEQ:
+    return LLVM::FCmpPredicate::oeq;
+  case llvm::CmpInst::Predicate::FCMP_ONE:
+    return LLVM::FCmpPredicate::one;
+  case llvm::CmpInst::Predicate::FCMP_OLT:
+    return LLVM::FCmpPredicate::olt;
+  case llvm::CmpInst::Predicate::FCMP_OLE:
+    return LLVM::FCmpPredicate::ole;
+  case llvm::CmpInst::Predicate::FCMP_OGT:
+    return LLVM::FCmpPredicate::ogt;
+  case llvm::CmpInst::Predicate::FCMP_OGE:
+    return LLVM::FCmpPredicate::oge;
+  case llvm::CmpInst::Predicate::FCMP_ORD:
+    return LLVM::FCmpPredicate::ord;
+  case llvm::CmpInst::Predicate::FCMP_ULT:
+    return LLVM::FCmpPredicate::ult;
+  case llvm::CmpInst::Predicate::FCMP_ULE:
+    return LLVM::FCmpPredicate::ule;
+  case llvm::CmpInst::Predicate::FCMP_UGT:
+    return LLVM::FCmpPredicate::ugt;
+  case llvm::CmpInst::Predicate::FCMP_UGE:
+    return LLVM::FCmpPredicate::uge;
+  case llvm::CmpInst::Predicate::FCMP_UNO:
+    return LLVM::FCmpPredicate::uno;
+  case llvm::CmpInst::Predicate::FCMP_UEQ:
+    return LLVM::FCmpPredicate::ueq;
+  case llvm::CmpInst::Predicate::FCMP_UNE:
+    return LLVM::FCmpPredicate::une;
+  }
+  llvm_unreachable("incorrect floating point comparison predicate");
 }
 
 static AtomicOrdering getLLVMAtomicOrdering(llvm::AtomicOrdering ordering) {
@@ -774,6 +818,16 @@ LogicalResult Importer::processInstruction(llvm::Instruction *inst) {
         rhs);
     return success();
   }
+  case llvm::Instruction::FCmp: {
+    Value lhs = processValue(inst->getOperand(0));
+    Value rhs = processValue(inst->getOperand(1));
+    if (!lhs || !rhs)
+      return failure();
+    instMap[inst] = b.create<FCmpOp>(
+        loc, b.getI1Type(),
+        getFCmpPredicate(cast<llvm::FCmpInst>(inst)->getPredicate()), lhs, rhs);
+    return success();
+  }
   case llvm::Instruction::Br: {
     auto *brInst = cast<llvm::BranchInst>(inst);
     OperationState state(loc,
diff --git a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
index 8b21576..84fcea4 100644
--- a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
+++ b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
@@ -1052,6 +1052,20 @@ func.func @memref_copy_contiguous(%in: memref<16x2xi32>, %offset: index) {
 
 // -----
 
+// CHECK-LABEL: func @memref_copy_0d_offset
+#map0 = affine_map<(d0) -> (d0 + 1)>
+#map1 = affine_map<() -> (1)>
+func.func @memref_copy_0d_offset(%in: memref<2xi32>) {
+  %buf = memref.alloc() : memref<i32>
+  %sub = memref.subview %in[1] [1] [1] : memref<2xi32> to memref<1xi32, #map0>
+  %scalar = memref.collapse_shape %sub [] : memref<1xi32, #map0> into memref<i32, #map1>
+  memref.copy %scalar, %buf : memref<i32, #map1> to memref<i32>
+  // CHECK: llvm.intr.memcpy
+  return
+}
+
+// -----
+
 // CHECK-LABEL: func @memref_copy_noncontiguous
 #map = affine_map<(d0, d1)[s0] -> (d0 * 2 + s0 + d1)>
 func.func @memref_copy_noncontiguous(%in: memref<16x2xi32>, %offset: index) {
diff --git a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
index 9f3141a..8d18596 100644
--- a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
+++ b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
@@ -127,6 +127,29 @@ module @operation_infer_types_from_otherop_results {
 
 // -----
 
+// CHECK-LABEL: module @operation_infer_types_from_interface
+module @operation_infer_types_from_interface {
+  // Unused operation that ensures the arithmetic dialect is loaded for use in the pattern.
+  arith.constant true
+
+  // CHECK: module @rewriters
+  // CHECK:   func @pdl_generated_rewriter
+  // CHECK:     %[[CST:.*]] = pdl_interp.create_operation "arith.constant" -> <inferred>
+  // CHECK:     %[[CST_RES:.*]] = pdl_interp.get_results of %[[CST]] : !pdl.range<value>
+  // CHECK:     %[[CST_TYPE:.*]] = pdl_interp.get_value_type of %[[CST_RES]] : !pdl.range<type>
+  // CHECK:     pdl_interp.create_operation "foo.op"  -> (%[[CST_TYPE]] : !pdl.range<type>)
+  pdl.pattern : benefit(1) {
+    %root = operation "foo.op"
+    rewrite %root {
+      %types = types
+      %newOp = operation "arith.constant" -> (%types : !pdl.range<type>)
+      %newOp2 = operation "foo.op" -> (%types : !pdl.range<type>)
+    }
+  }
+}
+
+// -----
+
 // CHECK-LABEL: module @replace_with_op
 module @replace_with_op {
   // CHECK: module @rewriters
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index e88c7c4..61e44b0 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -1143,3 +1143,116 @@ func.func @omp_task(%mem: memref<1xf32>) {
   }
   return
 }
+
+// -----
+
+func @omp_cancel() {
+  omp.sections {
+    // expected-error @below {{cancel parallel must appear inside a parallel region}}
+    omp.cancel cancellation_construct_type(parallel)
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_cancel1() {
+  omp.parallel {
+    // expected-error @below {{cancel sections must appear inside a sections region}}
+    omp.cancel cancellation_construct_type(sections)
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_cancel2() {
+  omp.sections {
+    // expected-error @below {{cancel loop must appear inside a worksharing-loop region}}
+    omp.cancel cancellation_construct_type(loop)
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
+  omp.wsloop nowait
+    for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+    // expected-error @below {{A worksharing construct that is canceled must not have a nowait clause}}
+    omp.cancel cancellation_construct_type(loop)
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_cancel4(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
+  omp.wsloop ordered(1)
+    for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+    // expected-error @below {{A worksharing construct that is canceled must not have an ordered clause}}
+    omp.cancel cancellation_construct_type(loop)
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_cancel5() -> () {
+  omp.sections nowait {
+    omp.section {
+      // expected-error @below {{A sections construct that is canceled must not have a nowait clause}}
+      omp.cancel cancellation_construct_type(sections)
+      omp.terminator
+    }
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_cancellationpoint() {
+  omp.sections {
+    // expected-error @below {{cancellation point parallel must appear inside a parallel region}}
+    omp.cancellationpoint cancellation_construct_type(parallel)
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_cancellationpoint1() {
+  omp.parallel {
+    // expected-error @below {{cancellation point sections must appear inside a sections region}}
+    omp.cancellationpoint cancellation_construct_type(sections)
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_cancellationpoint2() {
+  omp.sections {
+    // expected-error @below {{cancellation point loop must appear inside a worksharing-loop region}}
+    omp.cancellationpoint cancellation_construct_type(loop)
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 57387b3..9dd76c4 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -51,15 +51,15 @@ func.func @omp_terminator() -> () {
   omp.terminator
 }
 
-func.func @omp_parallel(%data_var : memref<i32>, %if_cond : i1, %num_threads : si32) -> () {
-  // CHECK: omp.parallel if(%{{.*}}) num_threads(%{{.*}} : si32) allocate(%{{.*}} : memref<i32> -> %{{.*}} : memref<i32>)
+func.func @omp_parallel(%data_var : memref<i32>, %if_cond : i1, %num_threads : i32) -> () {
+  // CHECK: omp.parallel if(%{{.*}}) num_threads(%{{.*}} : i32) allocate(%{{.*}} : memref<i32> -> %{{.*}} : memref<i32>)
   "omp.parallel" (%if_cond, %num_threads, %data_var, %data_var) ({
 
   // test without if condition
-  // CHECK: omp.parallel num_threads(%{{.*}} : si32) allocate(%{{.*}} : memref<i32> -> %{{.*}} : memref<i32>)
+  // CHECK: omp.parallel num_threads(%{{.*}} : i32) allocate(%{{.*}} : memref<i32> -> %{{.*}} : memref<i32>)
     "omp.parallel"(%num_threads, %data_var, %data_var) ({
       omp.terminator
-    }) {operand_segment_sizes = dense<[0,1,1,1,0]> : vector<5xi32>} : (si32, memref<i32>, memref<i32>) -> ()
+    }) {operand_segment_sizes = dense<[0,1,1,1,0]> : vector<5xi32>} : (i32, memref<i32>, memref<i32>) -> ()
 
   // CHECK: omp.barrier
     omp.barrier
@@ -71,13 +71,13 @@ func.func @omp_parallel(%data_var : memref<i32>, %if_cond : i1, %num_threads : s
     }) {operand_segment_sizes = dense<[1,0,1,1,0]> : vector<5xi32>} : (i1, memref<i32>, memref<i32>) -> ()
 
   // test without allocate
-  // CHECK: omp.parallel if(%{{.*}}) num_threads(%{{.*}} : si32)
+  // CHECK: omp.parallel if(%{{.*}}) num_threads(%{{.*}} : i32)
     "omp.parallel"(%if_cond, %num_threads) ({
       omp.terminator
-    }) {operand_segment_sizes = dense<[1,1,0,0,0]> : vector<5xi32>} : (i1, si32) -> ()
+    }) {operand_segment_sizes = dense<[1,1,0,0,0]> : vector<5xi32>} : (i1, i32) -> ()
 
     omp.terminator
-  }) {operand_segment_sizes = dense<[1,1,1,1,0]> : vector<5xi32>, proc_bind_val = #omp<"procbindkind spread">} : (i1, si32, memref<i32>, memref<i32>) -> ()
+  }) {operand_segment_sizes = dense<[1,1,1,1,0]> : vector<5xi32>, proc_bind_val = #omp<"procbindkind spread">} : (i1, i32, memref<i32>, memref<i32>) -> ()
 
   // test with multiple parameters for single variadic argument
   // CHECK: omp.parallel allocate(%{{.*}} : memref<i32> -> %{{.*}} : memref<i32>)
@@ -88,14 +88,26 @@ func.func @omp_parallel(%data_var : memref<i32>, %if_cond : i1, %num_threads : s
   return
 }
 
-func.func @omp_parallel_pretty(%data_var : memref<i32>, %if_cond : i1, %num_threads : si32, %allocator : si32) -> () {
+func.func @omp_parallel_pretty(%data_var : memref<i32>, %if_cond : i1, %num_threads : i32, %allocator : si32) -> () {
  // CHECK: omp.parallel
  omp.parallel {
   omp.terminator
  }
 
- // CHECK: omp.parallel num_threads(%{{.*}} : si32)
- omp.parallel num_threads(%num_threads : si32) {
+ // CHECK: omp.parallel num_threads(%{{.*}} : i32)
+ omp.parallel num_threads(%num_threads : i32) {
+   omp.terminator
+ }
+
+ %n_index = arith.constant 2 : index
+ // CHECK: omp.parallel num_threads(%{{.*}} : index)
+ omp.parallel num_threads(%n_index : index) {
+   omp.terminator
+ }
+
+ %n_i64 = arith.constant 4 : i64
+ // CHECK: omp.parallel num_threads(%{{.*}} : i64)
+ omp.parallel num_threads(%n_i64 : i64) {
    omp.terminator
  }
 
@@ -113,8 +125,8 @@ func.func @omp_parallel_pretty(%data_var : memref<i32>, %if_cond : i1, %num_thre
    omp.terminator
  }
 
- // CHECK omp.parallel if(%{{.*}}) num_threads(%{{.*}} : si32) private(%{{.*}} : memref<i32>) proc_bind(close)
- omp.parallel num_threads(%num_threads : si32) if(%if_cond: i1) proc_bind(close) {
+ // CHECK omp.parallel if(%{{.*}}) num_threads(%{{.*}} : i32) private(%{{.*}} : memref<i32>) proc_bind(close)
+ omp.parallel num_threads(%num_threads : i32) if(%if_cond: i1) proc_bind(close) {
    omp.terminator
  }
 
@@ -347,14 +359,14 @@ func.func @omp_simdloop_pretty_multiple(%lb1 : index, %ub1 : index, %step1 : ind
 }
 
 // CHECK-LABEL: omp_target
-func.func @omp_target(%if_cond : i1, %device : si32,  %num_threads : si32) -> () {
+func.func @omp_target(%if_cond : i1, %device : si32,  %num_threads : i32) -> () {
 
     // Test with optional operands; if_expr, device, thread_limit, private, firstprivate and nowait.
     // CHECK: omp.target if({{.*}}) device({{.*}}) thread_limit({{.*}}) nowait
     "omp.target"(%if_cond, %device, %num_threads) ({
        // CHECK: omp.terminator
        omp.terminator
-    }) {nowait, operand_segment_sizes = dense<[1,1,1]>: vector<3xi32>} : ( i1, si32, si32 ) -> ()
+    }) {nowait, operand_segment_sizes = dense<[1,1,1]>: vector<3xi32>} : ( i1, si32, i32 ) -> ()
 
     // CHECK: omp.barrier
     omp.barrier
@@ -363,14 +375,14 @@ func.func @omp_target(%if_cond : i1, %device : si32,  %num_threads : si32) -> ()
 }
 
 // CHECK-LABEL: omp_target_pretty
-func.func @omp_target_pretty(%if_cond : i1, %device : si32,  %num_threads : si32) -> () {
+func.func @omp_target_pretty(%if_cond : i1, %device : si32,  %num_threads : i32) -> () {
     // CHECK: omp.target if({{.*}}) device({{.*}})
     omp.target if(%if_cond) device(%device : si32) {
       omp.terminator
     }
 
     // CHECK: omp.target if({{.*}}) device({{.*}}) nowait
-    omp.target if(%if_cond) device(%device : si32) thread_limit(%num_threads : si32) nowait {
+    omp.target if(%if_cond) device(%device : si32) thread_limit(%num_threads : i32) nowait {
       omp.terminator
     }
 
@@ -1264,3 +1276,77 @@ func.func @omp_threadprivate() {
 }
 
 llvm.mlir.global internal @_QFsubEx() : i32
+
+func @omp_cancel_parallel(%if_cond : i1) -> () {
+  // Test with optional operand; if_expr.
+  omp.parallel {
+    // CHECK: omp.cancel cancellation_construct_type(parallel) if(%{{.*}})
+    omp.cancel cancellation_construct_type(parallel) if(%if_cond)
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+func @omp_cancel_wsloop(%lb : index, %ub : index, %step : index) {
+  omp.wsloop
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
+    // CHECK: omp.cancel cancellation_construct_type(loop)
+    omp.cancel cancellation_construct_type(loop)
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+func @omp_cancel_sections() -> () {
+  omp.sections {
+    omp.section {
+      // CHECK: omp.cancel cancellation_construct_type(sections)
+      omp.cancel cancellation_construct_type(sections)
+      omp.terminator
+    }
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+func @omp_cancellationpoint_parallel() -> () {
+  omp.parallel {
+    // CHECK: omp.cancellationpoint cancellation_construct_type(parallel)
+    omp.cancellationpoint cancellation_construct_type(parallel)
+    // CHECK: omp.cancel cancellation_construct_type(parallel)
+    omp.cancel cancellation_construct_type(parallel)
+    omp.terminator
+  }
+  return
+}
+
+func @omp_cancellationpoint_wsloop(%lb : index, %ub : index, %step : index) {
+  omp.wsloop
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
+    // CHECK: omp.cancellationpoint cancellation_construct_type(loop)
+    omp.cancellationpoint cancellation_construct_type(loop)
+    // CHECK: omp.cancel cancellation_construct_type(loop)
+    omp.cancel cancellation_construct_type(loop)
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+func @omp_cancellationpoint_sections() -> () {
+  omp.sections {
+    omp.section {
+      // CHECK: omp.cancellationpoint cancellation_construct_type(sections)
+      omp.cancellationpoint cancellation_construct_type(sections)
+      // CHECK: omp.cancel cancellation_construct_type(sections)
+      omp.cancel cancellation_construct_type(sections)
+      omp.terminator
+    }
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
diff --git a/mlir/test/Dialect/PDLInterp/invalid.mlir b/mlir/test/Dialect/PDLInterp/invalid.mlir
new file mode 100644
index 0000000..e44625d
--- /dev/null
+++ b/mlir/test/Dialect/PDLInterp/invalid.mlir
@@ -0,0 +1,26 @@
+// RUN: mlir-opt %s -split-input-file -verify-diagnostics
+
+//===----------------------------------------------------------------------===//
+// pdl::CreateOperationOp
+//===----------------------------------------------------------------------===//
+
+pdl_interp.func @rewriter() {
+  // expected-error@+1 {{op has inferred results, but the created operation 'foo.op' does not support result type inference}}
+  %op = pdl_interp.create_operation "foo.op" -> <inferred>
+  pdl_interp.finalize
+}
+
+// -----
+
+pdl_interp.func @rewriter() {
+  %type = pdl_interp.create_type i32
+  // expected-error@+1 {{op with inferred results cannot also have explicit result types}}
+  %op = "pdl_interp.create_operation"(%type) {
+    inferredResultTypes,
+    inputAttributeNames = [],
+    name = "foo.op",
+    operand_segment_sizes = dense<[0, 0, 1]> : vector<3xi32>
+  } : (!pdl.type) -> (!pdl.operation)
+  pdl_interp.finalize
+}
+
diff --git a/mlir/test/Dialect/PDLInterp/ops.mlir b/mlir/test/Dialect/PDLInterp/ops.mlir
index 52b711a..ef9cefe 100644
--- a/mlir/test/Dialect/PDLInterp/ops.mlir
+++ b/mlir/test/Dialect/PDLInterp/ops.mlir
@@ -6,6 +6,10 @@
 
 // -----
 
+// Unused operation to force loading the `arithmetic` dialect for the
+// test of type inferrence.
+arith.constant true
+
 func.func @operations(%attribute: !pdl.attribute,
                  %input: !pdl.value,
                  %type: !pdl.type) {
@@ -21,6 +25,9 @@ func.func @operations(%attribute: !pdl.attribute,
   // operands, and results
   %op3 = pdl_interp.create_operation "foo.op"(%input : !pdl.value) -> (%type : !pdl.type)
 
+  // inferred results
+  %op4 = pdl_interp.create_operation "arith.constant" -> <inferred>
+
   pdl_interp.finalize
 }
 
diff --git a/mlir/test/Dialect/Transform/transform-state-extension.mlir b/mlir/test/Dialect/Transform/transform-state-extension.mlir
new file mode 100644
index 0000000..c63d16d
--- /dev/null
+++ b/mlir/test/Dialect/Transform/transform-state-extension.mlir
@@ -0,0 +1,46 @@
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -verify-diagnostics -split-input-file
+
+// expected-note @below {{associated payload op}}
+module {
+  transform.sequence {
+  ^bb0(%arg0: !pdl.operation):
+    // expected-remark @below {{extension absent}}
+    test_check_if_test_extension_present %arg0
+    test_add_test_extension "A"
+    // expected-remark @below {{extension present, A}}
+    test_check_if_test_extension_present %arg0
+    test_remove_test_extension
+    // expected-remark @below {{extension absent}}
+    test_check_if_test_extension_present %arg0
+  }
+}
+
+// -----
+
+// expected-note @below {{associated payload op}}
+module {
+  transform.sequence {
+  ^bb0(%arg0: !pdl.operation):
+    test_add_test_extension "A"
+    test_remove_test_extension
+    test_add_test_extension "B"
+    // expected-remark @below {{extension present, B}}
+    test_check_if_test_extension_present %arg0
+  }
+}
+
+// -----
+
+// expected-note @below {{associated payload op}}
+module {
+  transform.sequence {
+  ^bb0(%arg0: !pdl.operation):
+    test_add_test_extension "A"
+    // expected-remark @below {{extension present, A}}
+    test_check_if_test_extension_present %arg0
+    // expected-note @below {{associated payload op}}
+    test_remap_operand_to_self %arg0
+    // expected-remark @below {{extension present, A}}
+    test_check_if_test_extension_present %arg0
+  }
+}
diff --git a/mlir/test/Rewrite/pdl-bytecode.mlir b/mlir/test/Rewrite/pdl-bytecode.mlir
index e1a8c60..aed1bbc 100644
--- a/mlir/test/Rewrite/pdl-bytecode.mlir
+++ b/mlir/test/Rewrite/pdl-bytecode.mlir
@@ -531,6 +531,41 @@ module @ir attributes { test.check_types_1 } {
 // pdl_interp::CreateOperationOp
 //===----------------------------------------------------------------------===//
 
+// Unused operation to force loading the `arithmetic` dialect for the
+// test of type inferrence.
+arith.constant 10
+
+// Test support for inferring the types of an operation.
+module @patterns {
+  pdl_interp.func @matcher(%root : !pdl.operation) {
+    pdl_interp.check_operation_name of %root is "test.op" -> ^pat, ^end
+
+  ^pat:
+    pdl_interp.record_match @rewriters::@success(%root : !pdl.operation) : benefit(1), loc([%root]) -> ^end
+
+  ^end:
+    pdl_interp.finalize
+  }
+
+  module @rewriters {
+    pdl_interp.func @success(%root : !pdl.operation) {
+      %attr = pdl_interp.create_attribute true
+      %cst = pdl_interp.create_operation "arith.constant" {"value" = %attr} -> <inferred>
+      %cstResults = pdl_interp.get_results of %cst : !pdl.range<value>
+      %op = pdl_interp.create_operation "test.success"(%cstResults : !pdl.range<value>)
+      pdl_interp.erase %root
+      pdl_interp.finalize
+    }
+  }
+}
+
+// CHECK-LABEL: test.create_op_infer_results
+// CHECK: %[[CST:.*]] = arith.constant true
+// CHECK: "test.success"(%[[CST]])
+module @ir attributes { test.create_op_infer_results } {
+  %results:2 = "test.op"() : () -> (i64, i64)
+}
+
 // -----
 
 //===----------------------------------------------------------------------===//
@@ -1182,12 +1217,6 @@ module @ir attributes { test.get_results_2 } {
 // Fully tested within the tests for other operations.
 
 //===----------------------------------------------------------------------===//
-// pdl_interp::InferredTypesOp
-//===----------------------------------------------------------------------===//
-
-// Fully tested within the tests for other operations.
-
-//===----------------------------------------------------------------------===//
 // pdl_interp::IsNotNullOp
 //===----------------------------------------------------------------------===//
 
diff --git a/mlir/test/Target/LLVMIR/Import/basic.ll b/mlir/test/Target/LLVMIR/Import/basic.ll
index 46566a9..8b77c69 100644
--- a/mlir/test/Target/LLVMIR/Import/basic.ll
+++ b/mlir/test/Target/LLVMIR/Import/basic.ll
@@ -281,6 +281,62 @@ define void @FPArithmetic(float %a, float %b, double %c, double %d) {
   ret void
 }
 
+; CHECK-LABEL: llvm.func @FPComparison(%arg0: f32, %arg1: f32)
+define void @FPComparison(float %a, float %b) {
+  ; CHECK: llvm.fcmp "_false" %arg0, %arg1
+  %1 = fcmp false float %a, %b
+  ; CHECK: llvm.fcmp "oeq" %arg0, %arg1
+  %2 = fcmp oeq float %a, %b
+  ; CHECK: llvm.fcmp "ogt" %arg0, %arg1
+  %3 = fcmp ogt float %a, %b
+  ; CHECK: llvm.fcmp "oge" %arg0, %arg1
+  %4 = fcmp oge float %a, %b
+  ; CHECK: llvm.fcmp "olt" %arg0, %arg1
+  %5 = fcmp olt float %a, %b
+  ; CHECK: llvm.fcmp "ole" %arg0, %arg1
+  %6 = fcmp ole float %a, %b
+  ; CHECK: llvm.fcmp "one" %arg0, %arg1
+  %7 = fcmp one float %a, %b
+  ; CHECK: llvm.fcmp "ord" %arg0, %arg1
+  %8 = fcmp ord float %a, %b
+  ; CHECK: llvm.fcmp "ueq" %arg0, %arg1
+  %9 = fcmp ueq float %a, %b
+  ; CHECK: llvm.fcmp "ugt" %arg0, %arg1
+  %10 = fcmp ugt float %a, %b
+  ; CHECK: llvm.fcmp "uge" %arg0, %arg1
+  %11 = fcmp uge float %a, %b
+  ; CHECK: llvm.fcmp "ult" %arg0, %arg1
+  %12 = fcmp ult float %a, %b
+  ; CHECK: llvm.fcmp "ule" %arg0, %arg1
+  %13 = fcmp ule float %a, %b
+  ; CHECK: llvm.fcmp "une" %arg0, %arg1
+  %14 = fcmp une float %a, %b
+  ; CHECK: llvm.fcmp "uno" %arg0, %arg1
+  %15 = fcmp uno float %a, %b
+  ; CHECK: llvm.fcmp "_true" %arg0, %arg1
+  %16 = fcmp true float %a, %b
+  ret void
+}
+
+; Testing rest of the floating point constant kinds.
+; CHECK-LABEL: llvm.func @FPConstant(%arg0: f16, %arg1: bf16, %arg2: f128, %arg3: f80)
+define void @FPConstant(half %a, bfloat %b, fp128 %c, x86_fp80 %d) {
+  ; CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(7.000000e+00 : f80) : f80
+  ; CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(0.000000e+00 : f128) : f128
+  ; CHECK-DAG: %[[C2:.+]] = llvm.mlir.constant(1.000000e+00 : bf16) : bf16
+  ; CHECK-DAG: %[[C3:.+]] = llvm.mlir.constant(1.000000e+00 : f16) : f16
+
+  ; CHECK: llvm.fadd %[[C3]], %arg0  : f16
+  %1 = fadd half 1.0, %a
+  ; CHECK: llvm.fadd %[[C2]], %arg1  : bf16
+  %2 = fadd bfloat 1.0, %b
+  ; CHECK: llvm.fadd %[[C1]], %arg2  : f128
+  %3 = fadd fp128 0xL00000000000000000000000000000000, %c
+  ; CHECK: llvm.fadd %[[C0]], %arg3  : f80
+  %4 = fadd x86_fp80 0xK4001E000000000000000, %d
+  ret void
+}
+
 ;
 ; Functions as constants.
 ;
diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp
index a58f12d..c9687fa 100644
--- a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp
+++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "TestTransformDialectExtension.h"
+#include "TestTransformStateExtension.h"
 #include "mlir/Dialect/PDL/IR/PDL.h"
 #include "mlir/Dialect/Transform/IR/TransformDialect.h"
 #include "mlir/Dialect/Transform/IR/TransformInterfaces.h"
-#include "mlir/IR/Builders.h"
 #include "mlir/IR/OpImplementation.h"
 
 using namespace mlir;
@@ -142,6 +142,49 @@ LogicalResult mlir::test::TestPrintRemarkAtOperandOp::apply(
   return success();
 }
 
+LogicalResult
+mlir::test::TestAddTestExtensionOp::apply(transform::TransformResults &results,
+                                          transform::TransformState &state) {
+  state.addExtension<TestTransformStateExtension>(getMessageAttr());
+  return success();
+}
+
+LogicalResult mlir::test::TestCheckIfTestExtensionPresentOp::apply(
+    transform::TransformResults &results, transform::TransformState &state) {
+  auto *extension = state.getExtension<TestTransformStateExtension>();
+  if (!extension) {
+    emitRemark() << "extension absent";
+    return success();
+  }
+
+  InFlightDiagnostic diag = emitRemark()
+                            << "extension present, " << extension->getMessage();
+  for (Operation *payload : state.getPayloadOps(getOperand())) {
+    diag.attachNote(payload->getLoc()) << "associated payload op";
+    assert(state.getHandleForPayloadOp(payload) == getOperand() &&
+           "inconsistent mapping between transform IR handles and payload IR "
+           "operations");
+  }
+
+  return success();
+}
+
+LogicalResult mlir::test::TestRemapOperandPayloadToSelfOp::apply(
+    transform::TransformResults &results, transform::TransformState &state) {
+  auto *extension = state.getExtension<TestTransformStateExtension>();
+  if (!extension)
+    return emitError() << "TestTransformStateExtension missing";
+
+  return extension->updateMapping(state.getPayloadOps(getOperand()).front(),
+                                  getOperation());
+}
+
+LogicalResult mlir::test::TestRemoveTestExtensionOp::apply(
+    transform::TransformResults &results, transform::TransformState &state) {
+  state.removeExtension<TestTransformStateExtension>();
+  return success();
+}
+
 namespace {
 /// Test extension of the Transform dialect. Registers additional ops and
 /// declares PDL as dependent dialect since the additional ops are using PDL
diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td
index 6fe34ae..d33f790 100644
--- a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td
+++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td
@@ -56,4 +56,41 @@ def TestPrintRemarkAtOperandOp
   let cppNamespace = "::mlir::test";
 }
 
+def TestAddTestExtensionOp
+  : Op<Transform_Dialect, "test_add_test_extension",
+       [DeclareOpInterfaceMethods<TransformOpInterface>,
+        NoSideEffect]> {
+  let arguments = (ins StrAttr:$message);
+  let assemblyFormat = "$message attr-dict";
+  let cppNamespace = "::mlir::test";
+}
+
+def TestCheckIfTestExtensionPresentOp
+  : Op<Transform_Dialect, "test_check_if_test_extension_present",
+       [DeclareOpInterfaceMethods<TransformOpInterface>]> {
+  let arguments = (ins
+    Arg<PDL_Operation, "", [TransformMappingRead, PayloadIRRead]>:$operand);
+  let assemblyFormat = "$operand attr-dict";
+  let cppNamespace = "::mlir::test";
+}
+
+def TestRemapOperandPayloadToSelfOp
+  : Op<Transform_Dialect, "test_remap_operand_to_self",
+       [DeclareOpInterfaceMethods<TransformOpInterface>]> {
+  let arguments = (ins
+    Arg<PDL_Operation, "",
+        [TransformMappingRead, TransformMappingWrite, PayloadIRRead]>:$operand);
+  let assemblyFormat = "$operand attr-dict";
+  let cppNamespace = "::mlir::test";
+}
+
+def TestRemoveTestExtensionOp
+  : Op<Transform_Dialect, "test_remove_test_extension",
+       [DeclareOpInterfaceMethods<TransformOpInterface>,
+        NoSideEffect]> {
+  let assemblyFormat = "attr-dict";
+  let cppNamespace = "::mlir::test";
+}
+
+
 #endif // MLIR_TESTTRANSFORMDIALECTEXTENSION_TD
diff --git a/mlir/test/lib/Dialect/Transform/TestTransformStateExtension.h b/mlir/test/lib/Dialect/Transform/TestTransformStateExtension.h
new file mode 100644
index 0000000..3b2eb76
--- /dev/null
+++ b/mlir/test/lib/Dialect/Transform/TestTransformStateExtension.h
@@ -0,0 +1,42 @@
+//===- TestTransformStateExtension.h - Test Utility -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an TransformState extension for the purpose of testing the
+// relevant APIs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TEST_LIB_DIALECT_TRANSFORM_TESTTRANSFORMSTATEEXTENSION_H
+#define MLIR_TEST_LIB_DIALECT_TRANSFORM_TESTTRANSFORMSTATEEXTENSION_H
+
+#include "mlir/Dialect/Transform/IR/TransformInterfaces.h"
+
+using namespace mlir;
+
+namespace mlir {
+namespace test {
+class TestTransformStateExtension
+    : public transform::TransformState::Extension {
+public:
+  TestTransformStateExtension(transform::TransformState &state,
+                              StringAttr message)
+      : Extension(state), message(message) {}
+
+  StringRef getMessage() const { return message.getValue(); }
+
+  LogicalResult updateMapping(Operation *previous, Operation *updated) {
+    return replacePayloadOp(previous, updated);
+  }
+
+private:
+  StringAttr message;
+};
+} // namespace test
+} // namespace mlir
+
+#endif // MLIR_TEST_LIB_DIALECT_TRANSFORM_TESTTRANSFORMSTATEEXTENSION_H
diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index 96f0a3f..eb90010 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -781,6 +781,8 @@ cc_library(
 cc_library(
     name = "analysis",
     srcs = glob([
+        "lib/Analysis/FlowSensitive/Models/*.cpp",
+        "lib/Analysis/FlowSensitive/*.cpp",
         "lib/Analysis/*.cpp",
         "lib/Analysis/*.h",
     ]),
diff --git a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel
index ca38c39..dc625ef 100644
--- a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel
@@ -117,6 +117,31 @@ cc_test(
 )
 
 cc_test(
+    name = "analysis_flow_sensitive_tests",
+    srcs = glob(
+        [
+            "Analysis/FlowSensitive/*.cpp",
+            "Analysis/FlowSensitive/*.h",
+        ],
+        allow_empty = False,
+    ),
+    deps = [
+        "//clang:analysis",
+        "//clang:ast",
+        "//clang:ast_matchers",
+        "//clang:basic",
+        "//clang:lex",
+        "//clang:serialization",
+        "//clang:tooling",
+        "//llvm:Support",
+        "//llvm:TestingSupport",
+        "//llvm:gmock",
+        "//llvm:gtest",
+        "//llvm:gtest_main",
+    ],
+)
+
+cc_test(
     name = "basic_tests",
     size = "small",
     srcs = glob(