256 files changed, 15737 insertions, 3761 deletions
diff --git a/llvm/docs/CommandGuide/lit.rst b/llvm/docs/CommandGuide/lit.rst
index 70daae4..bbc1497 100644
--- a/llvm/docs/CommandGuide/lit.rst
+++ b/llvm/docs/CommandGuide/lit.rst
@@ -628,7 +628,7 @@ TestRunner.py:
  %{fs-src-root}          root component of file system paths pointing to the LLVM checkout
  %{fs-tmp-root}          root component of file system paths pointing to the test's temporary directory
  %{fs-sep}               file system path separator
- %t                      temporary file name unique to the test
+ %t                      a path unique to the test (which may be used to make files or directories)
  %basename_t             The last path component of %t but without the ``.tmp`` extension (deprecated, use ``%{t:stem}`` instead)
  %%                      %
  %/s                     %s but ``\`` is replaced by ``/``
diff --git a/llvm/include/llvm/Analysis/InstSimplifyFolder.h b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
index 58793ed..2832beb 100644
--- a/llvm/include/llvm/Analysis/InstSimplifyFolder.h
+++ b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
@@ -120,7 +120,7 @@ public:
   }
 
   Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
-                             Instruction *FMFSource) const override {
+                             Instruction *FMFSource = nullptr) const override {
     return simplifyBinaryIntrinsic(ID, Ty, LHS, RHS, SQ,
                                    dyn_cast_if_present<CallBase>(FMFSource));
   }
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 93aff35..b0601eb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -276,7 +276,6 @@ public:
                                  SmallVector<Register> &Ops) const;
 
   /// Replace \p MI with a build_vector.
-  bool matchCombineShuffleToBuildVector(MachineInstr &MI) const;
   void applyCombineShuffleToBuildVector(MachineInstr &MI) const;
 
   /// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
@@ -295,8 +294,6 @@ public:
   /// Replace \p MI with a concat_vectors with \p Ops.
   void applyCombineShuffleVector(MachineInstr &MI,
                                  const ArrayRef<Register> Ops) const;
-  bool matchShuffleToExtract(MachineInstr &MI) const;
-  void applyShuffleToExtract(MachineInstr &MI) const;
 
   /// Optimize memcpy intrinsics et al, e.g. constant len calls.
   /// /p MaxLen if non-zero specifies the max length of a mem libcall to inline.
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 64a7563..a70c9c0 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2127,7 +2127,7 @@ public:
   /// performs validation and error handling, returns the function. Otherwise,
   /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
   /// Should be used only when getIRStackGuard returns nullptr.
-  virtual Function *getSSPStackGuardCheck(const Module &M) const;
+  Function *getSSPStackGuardCheck(const Module &M) const;
 
 protected:
   Value *getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SymbolFilter.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SymbolFilter.h
new file mode 100644
index 0000000..5170893
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SymbolFilter.h
@@ -0,0 +1,173 @@
+//===- SymbolFilter.h - Utilities for Symbol Filtering ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_SYMBOLFILTER_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_SYMBOLFILTER_H
+
+#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
+
+#include <cmath>
+#include <type_traits>
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+namespace shared {
+using SPSBloomFilter =
+    SPSTuple<bool, uint32_t, uint32_t, uint32_t, SPSSequence<uint64_t>>;
+}
+
+class BloomFilter {
+public:
+  using HashFunc = std::function<uint32_t(StringRef)>;
+
+  BloomFilter() = default;
+  BloomFilter(BloomFilter &&) noexcept = default;
+  BloomFilter &operator=(BloomFilter &&) noexcept = default;
+  BloomFilter(const BloomFilter &) = delete;
+  BloomFilter &operator=(const BloomFilter &) = delete;
+
+  BloomFilter(uint32_t SymbolCount, float FalsePositiveRate, HashFunc hashFn)
+      : HashFn(std::move(hashFn)) {
+    initialize(SymbolCount, FalsePositiveRate);
+  }
+  bool isInitialized() const { return Initialized; }
+
+  void add(StringRef Sym) {
+    assert(Initialized);
+    addHash(HashFn(Sym));
+  }
+
+  bool mayContain(StringRef Sym) const {
+    return !isEmpty() && testHash(HashFn(Sym));
+  }
+
+  bool isEmpty() const { return SymbolCount == 0; }
+
+private:
+  friend class shared::SPSSerializationTraits<shared::SPSBloomFilter,
+                                              BloomFilter>;
+  static constexpr uint32_t BitsPerEntry = 64;
+
+  bool Initialized = false;
+  uint32_t SymbolCount = 0;
+  uint32_t BloomSize = 0;
+  uint32_t BloomShift = 0;
+  std::vector<uint64_t> BloomTable;
+  HashFunc HashFn;
+
+  void initialize(uint32_t SymCount, float FalsePositiveRate) {
+    assert(SymCount > 0);
+    SymbolCount = SymCount;
+    Initialized = true;
+
+    float ln2 = std::log(2.0f);
+    float M = -1.0f * SymbolCount * std::log(FalsePositiveRate) / (ln2 * ln2);
+    BloomSize = static_cast<uint32_t>(std::ceil(M / BitsPerEntry));
+    BloomShift = std::min(6u, log2ceil(SymbolCount));
+    BloomTable.resize(BloomSize, 0);
+  }
+
+  void addHash(uint32_t Hash) {
+    uint32_t Hash2 = Hash >> BloomShift;
+    uint32_t N = (Hash / BitsPerEntry) % BloomSize;
+    uint64_t Mask =
+        (1ULL << (Hash % BitsPerEntry)) | (1ULL << (Hash2 % BitsPerEntry));
+    BloomTable[N] |= Mask;
+  }
+
+  bool testHash(uint32_t Hash) const {
+    uint32_t Hash2 = Hash >> BloomShift;
+    uint32_t N = (Hash / BitsPerEntry) % BloomSize;
+    uint64_t Mask =
+        (1ULL << (Hash % BitsPerEntry)) | (1ULL << (Hash2 % BitsPerEntry));
+    return (BloomTable[N] & Mask) == Mask;
+  }
+
+  static constexpr uint32_t log2ceil(uint32_t V) {
+    return V <= 1 ? 0 : 32 - countl_zero(V - 1);
+  }
+};
+
+class BloomFilterBuilder {
+public:
+  using HashFunc = BloomFilter::HashFunc;
+
+  BloomFilterBuilder() = default;
+
+  BloomFilterBuilder &setFalsePositiveRate(float Rate) {
+    assert(Rate > 0.0f && Rate < 1.0f);
+    FalsePositiveRate = Rate;
+    return *this;
+  }
+
+  BloomFilterBuilder &setHashFunction(HashFunc Fn) {
+    HashFn = std::move(Fn);
+    return *this;
+  }
+
+  BloomFilter build(ArrayRef<StringRef> Symbols) const {
+    assert(!Symbols.empty() && "Cannot build filter from empty symbol list.");
+    BloomFilter F(static_cast<uint32_t>(Symbols.size()), FalsePositiveRate,
+                  HashFn);
+    for (const auto &Sym : Symbols)
+      F.add(Sym);
+
+    return F;
+  }
+
+private:
+  float FalsePositiveRate = 0.02f;
+  HashFunc HashFn = [](StringRef S) -> uint32_t {
+    uint32_t H = 5381;
+    for (char C : S)
+      H = ((H << 5) + H) + static_cast<uint8_t>(C); // H * 33 + C
+    return H;
+  };
+};
+
+namespace shared {
+
+template <> class SPSSerializationTraits<SPSBloomFilter, BloomFilter> {
+public:
+  static size_t size(const BloomFilter &Filter) {
+    return SPSBloomFilter::AsArgList::size(
+        Filter.Initialized, Filter.SymbolCount, Filter.BloomSize,
+        Filter.BloomShift, Filter.BloomTable);
+  }
+
+  static bool serialize(SPSOutputBuffer &OB, const BloomFilter &Filter) {
+    return SPSBloomFilter::AsArgList::serialize(
+        OB, Filter.Initialized, Filter.SymbolCount, Filter.BloomSize,
+        Filter.BloomShift, Filter.BloomTable);
+  }
+
+  static bool deserialize(SPSInputBuffer &IB, BloomFilter &Filter) {
+    bool IsInitialized;
+    uint32_t SymbolCount = 0, BloomSize = 0, BloomShift = 0;
+    std::vector<uint64_t> BloomTable;
+
+    if (!SPSBloomFilter::AsArgList::deserialize(
+            IB, IsInitialized, SymbolCount, BloomSize, BloomShift, BloomTable))
+      return false;
+
+    Filter.Initialized = IsInitialized;
+    Filter.SymbolCount = SymbolCount;
+    Filter.BloomSize = BloomSize;
+    Filter.BloomShift = BloomShift;
+    Filter.BloomTable = std::move(BloomTable);
+
+    return true;
+  }
+};
+
+} // end namespace shared
+} // end namespace orc
+} // end namespace llvm
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_SYMBOLFILTER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h
new file mode 100644
index 0000000..50d4f6d041
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h
@@ -0,0 +1,514 @@
+//===- LibraryResolver.h - Automatic Library Symbol Resolution -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides support for automatically searching symbols across
+// dynamic libraries that have not yet been loaded.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_LIBRARYRESOLVER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_LIBRARYRESOLVER_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SymbolFilter.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h"
+#include "llvm/Support/Path.h"
+
+#include <atomic>
+#include <shared_mutex>
+#include <unordered_map>
+
+namespace llvm {
+namespace orc {
+
+/// Manages library metadata and state for symbol resolution.
+///
+/// Tracks libraries by load state and kind (user/system), and stores
+/// associated Bloom filters and hash maps to speed up symbol lookups.
+/// Thread-safe for concurrent access.
+class LibraryManager {
+public:
+  enum class LibState : uint8_t { Unloaded = 0, Loaded = 1, Queried = 2 };
+
+  class LibraryInfo {
+  public:
+    LibraryInfo(const LibraryInfo &) = delete;
+    LibraryInfo &operator=(const LibraryInfo &) = delete;
+
+    LibraryInfo(std::string FilePath, LibState S, PathType K,
+                std::optional<BloomFilter> Filter = std::nullopt)
+        : FilePath(std::move(FilePath)), S(S), K(K), Filter(std::move(Filter)) {
+    }
+
+    StringRef getBasePath() const { return sys::path::parent_path(FilePath); }
+    StringRef getFileName() const { return sys::path::filename(FilePath); }
+
+    std::string getFullPath() const { return FilePath; }
+
+    bool setFilter(BloomFilter F) {
+      std::lock_guard<std::shared_mutex> Lock(Mtx);
+      if (Filter)
+        return false;
+      Filter.emplace(std::move(F));
+      return true;
+    }
+
+    bool ensureFilterBuilt(const BloomFilterBuilder &FB,
+                           ArrayRef<StringRef> Symbols) {
+      std::lock_guard<std::shared_mutex> Lock(Mtx);
+      if (Filter)
+        return false;
+      Filter.emplace(FB.build(Symbols));
+      return true;
+    }
+
+    bool mayContain(StringRef Symbol) const {
+      assert(hasFilter());
+      std::shared_lock<std::shared_mutex> Lock(Mtx);
+      return Filter->mayContain(Symbol);
+    }
+
+    bool hasFilter() const {
+      std::shared_lock<std::shared_mutex> Lock(Mtx);
+      return Filter.has_value();
+    }
+
+    LibState getState() const { return S.load(); }
+    PathType getKind() const { return K; }
+
+    void setState(LibState s) { S.store(s); }
+
+    bool operator==(const LibraryInfo &other) const {
+      return FilePath == other.FilePath;
+    }
+
+  private:
+    std::string FilePath;
+    std::atomic<LibState> S;
+    PathType K;
+    std::optional<BloomFilter> Filter;
+    mutable std::shared_mutex Mtx;
+  };
+
+  /// A read-only view of libraries filtered by state and kind.
+  ///
+  /// Lets you loop over only the libraries in a map that match a given State
+  /// and PathType.
+  class FilteredView {
+  public:
+    using Map = StringMap<std::shared_ptr<LibraryInfo>>;
+    using Iterator = typename Map::const_iterator;
+    class FilterIterator {
+    public:
+      FilterIterator(Iterator it_, Iterator end_, LibState S, PathType K)
+          : it(it_), end(end_), S(S), K(K) {
+        advance();
+      }
+
+      bool operator!=(const FilterIterator &other) const {
+        return it != other.it;
+      }
+
+      const std::shared_ptr<LibraryInfo> &operator*() const {
+        return it->second;
+      }
+
+      FilterIterator &operator++() {
+        ++it;
+        advance();
+        return *this;
+      }
+
+    private:
+      void advance() {
+        for (; it != end; ++it)
+          if (it->second->getState() == S && it->second->getKind() == K)
+            break;
+      }
+      Iterator it;
+      Iterator end;
+      LibState S;
+      PathType K;
+    };
+    FilteredView(Iterator begin, Iterator end, LibState s, PathType k)
+        : mapBegin(begin), mapEnd(end), state(s), kind(k) {}
+
+    FilterIterator begin() const {
+      return FilterIterator(mapBegin, mapEnd, state, kind);
+    }
+
+    FilterIterator end() const {
+      return FilterIterator(mapEnd, mapEnd, state, kind);
+    }
+
+  private:
+    Iterator mapBegin;
+    Iterator mapEnd;
+    LibState state;
+    PathType kind;
+  };
+
+private:
+  StringMap<std::shared_ptr<LibraryInfo>> Libraries;
+  mutable std::shared_mutex Mtx;
+
+public:
+  using LibraryVisitor = std::function<bool(const LibraryInfo &)>;
+
+  LibraryManager() = default;
+  ~LibraryManager() = default;
+
+  bool addLibrary(std::string Path, PathType Kind,
+                  std::optional<BloomFilter> Filter = std::nullopt) {
+    std::unique_lock<std::shared_mutex> Lock(Mtx);
+    if (Libraries.count(Path) > 0)
+      return false;
+    Libraries.insert({std::move(Path),
+                      std::make_shared<LibraryInfo>(Path, LibState::Unloaded,
+                                                    Kind, std::move(Filter))});
+    return true;
+  }
+
+  bool hasLibrary(StringRef Path) const {
+    std::shared_lock<std::shared_mutex> Lock(Mtx);
+    if (Libraries.count(Path) > 0)
+      return true;
+    return false;
+  }
+
+  bool removeLibrary(StringRef Path) {
+    std::unique_lock<std::shared_mutex> Lock(Mtx);
+    auto I = Libraries.find(Path);
+    if (I == Libraries.end())
+      return false;
+    Libraries.erase(I);
+    return true;
+  }
+
+  void markLoaded(StringRef Path) {
+    std::unique_lock<std::shared_mutex> Lock(Mtx);
+    if (auto It = Libraries.find(Path); It != Libraries.end())
+      It->second->setState(LibState::Loaded);
+  }
+
+  void markQueried(StringRef Path) {
+    std::unique_lock<std::shared_mutex> Lock(Mtx);
+    if (auto It = Libraries.find(Path); It != Libraries.end())
+      It->second->setState(LibState::Queried);
+  }
+
+  std::shared_ptr<LibraryInfo> getLibrary(StringRef Path) {
+    std::shared_lock<std::shared_mutex> Lock(Mtx);
+    if (auto It = Libraries.find(Path); It != Libraries.end())
+      return It->second;
+    return nullptr;
+  }
+
+  FilteredView getView(LibState S, PathType K) const {
+    std::shared_lock<std::shared_mutex> Lock(Mtx);
+    return FilteredView(Libraries.begin(), Libraries.end(), S, K);
+  }
+
+  void forEachLibrary(const LibraryVisitor &visitor) const {
+    std::unique_lock<std::shared_mutex> Lock(Mtx);
+    for (const auto &[_, entry] : Libraries) {
+      if (!visitor(*entry))
+        break;
+    }
+  }
+
+  bool isLoaded(StringRef Path) const {
+    std::unique_lock<std::shared_mutex> Lock(Mtx);
+    if (auto It = Libraries.find(Path.str()); It != Libraries.end())
+      return It->second->getState() == LibState::Loaded;
+    return false;
+  }
+
+  bool isQueried(StringRef Path) const {
+    std::unique_lock<std::shared_mutex> Lock(Mtx);
+    if (auto It = Libraries.find(Path.str()); It != Libraries.end())
+      return It->second->getState() == LibState::Queried;
+    return false;
+  }
+
+  void clear() {
+    std::unique_lock<std::shared_mutex> Lock(Mtx);
+    Libraries.clear();
+  }
+};
+
+using LibraryInfo = LibraryManager::LibraryInfo;
+
+struct SearchPlanEntry {
+  LibraryManager::LibState State; // Loaded, Queried, Unloaded
+  PathType Type;                  // User, System
+};
+
+struct SearchPolicy {
+  std::vector<SearchPlanEntry> Plan;
+
+  static SearchPolicy defaultPlan() {
+    return {{{LibraryManager::LibState::Loaded, PathType::User},
+             {LibraryManager::LibState::Queried, PathType::User},
+             {LibraryManager::LibState::Unloaded, PathType::User},
+             {LibraryManager::LibState::Loaded, PathType::System},
+             {LibraryManager::LibState::Queried, PathType::System},
+             {LibraryManager::LibState::Unloaded, PathType::System}}};
+  }
+};
+
+struct SymbolEnumeratorOptions {
+  enum Filter : uint32_t {
+    None = 0,
+    IgnoreUndefined = 1 << 0,
+    IgnoreWeak = 1 << 1,
+    IgnoreIndirect = 1 << 2,
+    IgnoreHidden = 1 << 3,
+    IgnoreNonGlobal = 1 << 4
+  };
+
+  static SymbolEnumeratorOptions defaultOptions() {
+    return {Filter::IgnoreUndefined | Filter::IgnoreWeak |
+            Filter::IgnoreIndirect};
+  }
+  uint32_t FilterFlags = Filter::None;
+};
+
+struct SearchConfig {
+  SearchPolicy Policy;
+  SymbolEnumeratorOptions Options;
+
+  SearchConfig()
+      : Policy(SearchPolicy::defaultPlan()), // default plan
+        Options(SymbolEnumeratorOptions::defaultOptions()) {}
+};
+
+/// Scans libraries and resolves Symbols across user and system paths.
+///
+/// Supports symbol enumeration and filtering via SymbolEnumerator, and tracks
+/// symbol resolution results through SymbolQuery. Thread-safe and uses
+/// LibraryScanHelper for efficient path resolution and caching.
+class LibraryResolver {
+  friend class LibraryResolutionDriver;
+
+public:
+  class SymbolEnumerator {
+  public:
+    enum class EnumerateResult { Continue, Stop, Error };
+
+    using OnEachSymbolFn = std::function<EnumerateResult(StringRef Sym)>;
+
+    static bool enumerateSymbols(StringRef Path, OnEachSymbolFn OnEach,
+                                 const SymbolEnumeratorOptions &Opts);
+  };
+
+  /// Tracks a set of symbols and the libraries where they are resolved.
+  ///
+  /// SymbolQuery is used to keep track of which symbols have been resolved
+  /// to which libraries. It supports concurrent read/write access using a
+  /// shared mutex, allowing multiple readers or a single writer at a time.
+  class SymbolQuery {
+  public:
+    /// Holds the result for a single symbol.
+    struct Result {
+      std::string Name;
+      std::string ResolvedLibPath;
+    };
+
+  private:
+    mutable std::shared_mutex Mtx;
+    StringMap<Result> Results;
+    std::atomic<size_t> ResolvedCount = 0;
+
+  public:
+    explicit SymbolQuery(const std::vector<std::string> &Symbols) {
+      for (const auto &s : Symbols) {
+        if (!Results.contains(s))
+          Results.insert({s, Result{s, ""}});
+      }
+    }
+
+    SmallVector<StringRef> getUnresolvedSymbols() const {
+      SmallVector<StringRef> Unresolved;
+      std::shared_lock<std::shared_mutex> Lock(Mtx);
+      for (const auto &[name, res] : Results) {
+        if (res.ResolvedLibPath.empty())
+          Unresolved.push_back(name);
+      }
+      return Unresolved;
+    }
+
+    void resolve(StringRef Sym, const std::string &LibPath) {
+      std::unique_lock<std::shared_mutex> Lock(Mtx);
+      auto It = Results.find(Sym);
+      if (It != Results.end() && It->second.ResolvedLibPath.empty()) {
+        It->second.ResolvedLibPath = LibPath;
+        ResolvedCount.fetch_add(1, std::memory_order_relaxed);
+      }
+    }
+
+    bool allResolved() const {
+      return ResolvedCount.load(std::memory_order_relaxed) == Results.size();
+    }
+
+    bool hasUnresolved() const {
+      return ResolvedCount.load(std::memory_order_relaxed) < Results.size();
+    }
+
+    std::optional<StringRef> getResolvedLib(StringRef Sym) const {
+      std::shared_lock<std::shared_mutex> Lock(Mtx);
+      auto It = Results.find(Sym);
+      if (It != Results.end() && !It->second.ResolvedLibPath.empty())
+        return StringRef(It->second.ResolvedLibPath);
+      return std::nullopt;
+    }
+
+    bool isResolved(StringRef Sym) const {
+      std::shared_lock<std::shared_mutex> Lock(Mtx);
+      auto It = Results.find(Sym.str());
+      return It != Results.end() && !It->second.ResolvedLibPath.empty();
+    }
+
+    std::vector<const Result *> getAllResults() const {
+      std::shared_lock<std::shared_mutex> Lock(Mtx);
+      std::vector<const Result *> Out;
+      Out.reserve(Results.size());
+      for (const auto &[_, res] : Results)
+        Out.push_back(&res);
+      return Out;
+    }
+  };
+
+  struct Setup {
+    std::vector<std::string> BasePaths;
+    std::shared_ptr<LibraryPathCache> Cache;
+    std::shared_ptr<PathResolver> PResolver;
+
+    size_t ScanBatchSize = 0;
+
+    LibraryScanner::ShouldScanFn ShouldScanCall = [](StringRef) {
+      return true;
+    };
+
+    BloomFilterBuilder FilterBuilder = BloomFilterBuilder();
+
+    static Setup
+    create(std::vector<std::string> BasePaths,
+           std::shared_ptr<LibraryPathCache> existingCache = nullptr,
+           std::shared_ptr<PathResolver> existingResolver = nullptr,
+           LibraryScanner::ShouldScanFn customShouldScan = nullptr) {
+      Setup S;
+      S.BasePaths = std::move(BasePaths);
+
+      S.Cache =
+          existingCache ? existingCache : std::make_shared<LibraryPathCache>();
+
+      S.PResolver = existingResolver ? existingResolver
+                                     : std::make_shared<PathResolver>(S.Cache);
+
+      if (customShouldScan)
+        S.ShouldScanCall = std::move(customShouldScan);
+
+      return S;
+    }
+  };
+
+  LibraryResolver() = delete;
+  explicit LibraryResolver(const Setup &S);
+  ~LibraryResolver() = default;
+
+  using OnSearchComplete = unique_function<void(SymbolQuery &)>;
+
+  void dump() {
+    int i = 0;
+    LibMgr.forEachLibrary([&](const LibraryInfo &Lib) -> bool {
+      dbgs() << ++i << ". Library Path : " << Lib.getFullPath() << " -> \n\t\t:"
+             << " ({Type : ("
+             << (Lib.getKind() == PathType::User ? "User" : "System")
+             << ") }, { State : "
+             << (Lib.getState() == LibraryManager::LibState::Loaded
+                     ? "Loaded"
+                     : "Unloaded")
+             << "})\n";
+      return true;
+    });
+  }
+
+  void searchSymbolsInLibraries(std::vector<std::string> &SymList,
+                                OnSearchComplete OnComplete,
+                                const SearchConfig &Config = SearchConfig());
+
+private:
+  bool scanLibrariesIfNeeded(PathType K, size_t BatchSize = 0);
+  void resolveSymbolsInLibrary(LibraryInfo &Lib, SymbolQuery &Q,
+                               const SymbolEnumeratorOptions &Opts);
+  bool
+  symbolExistsInLibrary(const LibraryInfo &Lib, StringRef Sym,
+                        std::vector<std::string> *MatchedSymbols = nullptr);
+
+  bool symbolExistsInLibrary(const LibraryInfo &Lib, StringRef SymName,
+                             std::vector<std::string> *AllSymbols,
+                             const SymbolEnumeratorOptions &Opts);
+
+  std::shared_ptr<LibraryPathCache> LibPathCache;
+  std::shared_ptr<PathResolver> LibPathResolver;
+  LibraryScanHelper ScanHelper;
+  BloomFilterBuilder FB;
+  LibraryManager LibMgr;
+  LibraryScanner::ShouldScanFn ShouldScanCall;
+  size_t scanBatchSize;
+};
+
+using SymbolEnumerator = LibraryResolver::SymbolEnumerator;
+using SymbolQuery = LibraryResolver::SymbolQuery;
+using EnumerateResult = SymbolEnumerator::EnumerateResult;
+
+class LibraryResolutionDriver {
+public:
+  static std::unique_ptr<LibraryResolutionDriver>
+  create(const LibraryResolver::Setup &S);
+
+  void addScanPath(const std::string &Path, PathType Kind);
+  bool markLibraryLoaded(StringRef Path);
+  bool markLibraryUnLoaded(StringRef Path);
+  bool isLibraryLoaded(StringRef Path) const {
+    return LR->LibMgr.isLoaded(Path);
+  }
+
+  void resetAll() {
+    LR->LibMgr.clear();
+    LR->ScanHelper.resetToScan();
+    LR->LibPathCache->clear();
+  }
+
+  void scanAll(size_t BatchSize = 0) {
+    LR->scanLibrariesIfNeeded(PathType::User, BatchSize);
+    LR->scanLibrariesIfNeeded(PathType::System, BatchSize);
+  }
+
+  void scan(PathType PK, size_t BatchSize = 0) {
+    LR->scanLibrariesIfNeeded(PK, BatchSize);
+  }
+
+  void resolveSymbols(std::vector<std::string> Symbols,
+                      LibraryResolver::OnSearchComplete OnCompletion,
+                      const SearchConfig &Config = SearchConfig());
+
+  ~LibraryResolutionDriver() = default;
+
+private:
+  LibraryResolutionDriver(std::unique_ptr<LibraryResolver> L)
+      : LR(std::move(L)) {}
+
+  std::unique_ptr<LibraryResolver> LR;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_LIBRARYRESOLVER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h
new file mode 100644
index 0000000..d1c2013
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h
@@ -0,0 +1,474 @@
+//===- LibraryScanner.h - Scanner for Shared Libraries ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides functionality for scanning dynamic (shared) libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_LIBRARYSCANNER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_LIBRARYSCANNER_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/StringSaver.h"
+
+#include <atomic>
+#include <mutex>
+#include <queue>
+#include <shared_mutex>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace llvm {
+namespace orc {
+
+class LibraryManager;
+
+class LibraryPathCache {
+  friend class PathResolver;
+
+public:
+  LibraryPathCache() = default;
+
+  void clear(bool isRealPathCache = false) {
+    std::unique_lock<std::shared_mutex> lock(Mtx);
+    Seen.clear();
+    if (isRealPathCache) {
+      RealPathCache.clear();
+#ifndef _WIN32
+      ReadlinkCache.clear();
+      LstatCache.clear();
+#endif
+    }
+  }
+
+  void markSeen(const std::string &CanonPath) {
+    std::unique_lock<std::shared_mutex> lock(Mtx);
+    Seen.insert(CanonPath);
+  }
+
+  bool hasSeen(StringRef CanonPath) const {
+    std::shared_lock<std::shared_mutex> lock(Mtx);
+    return Seen.contains(CanonPath);
+  }
+
+  bool hasSeenOrMark(StringRef CanonPath) {
+    std::string s = CanonPath.str();
+    {
+      std::shared_lock<std::shared_mutex> lock(Mtx);
+      if (Seen.contains(s))
+        return true;
+    }
+    {
+      std::unique_lock<std::shared_mutex> lock(Mtx);
+      Seen.insert(s);
+    }
+    return false;
+  }
+
+private:
+  mutable std::shared_mutex Mtx;
+
+  struct PathInfo {
+    std::string canonicalPath;
+    std::error_code ErrnoCode;
+  };
+
+  void insert_realpath(StringRef Path, const PathInfo &Info) {
+    std::unique_lock<std::shared_mutex> lock(Mtx);
+    RealPathCache.insert({Path, Info});
+  }
+
+  std::optional<PathInfo> read_realpath(StringRef Path) const {
+    std::shared_lock<std::shared_mutex> lock(Mtx);
+    auto It = RealPathCache.find(Path);
+    if (It != RealPathCache.end())
+      return It->second;
+
+    return std::nullopt;
+  }
+
+  StringSet<> Seen;
+  StringMap<PathInfo> RealPathCache;
+
+#ifndef _WIN32
+  StringMap<std::string> ReadlinkCache;
+  StringMap<mode_t> LstatCache;
+
+  void insert_link(StringRef Path, const std::string &s) {
+    std::unique_lock<std::shared_mutex> lock(Mtx);
+    ReadlinkCache.insert({Path, s});
+  }
+
+  std::optional<std::string> read_link(StringRef Path) const {
+    std::shared_lock<std::shared_mutex> lock(Mtx);
+    auto It = ReadlinkCache.find(Path);
+    if (It != ReadlinkCache.end())
+      return It->second;
+
+    return std::nullopt;
+  }
+
+  void insert_lstat(StringRef Path, mode_t m) {
+    std::unique_lock<std::shared_mutex> lock(Mtx);
+    LstatCache.insert({Path, m});
+  }
+
+  std::optional<mode_t> read_lstat(StringRef Path) const {
+    std::shared_lock<std::shared_mutex> lock(Mtx);
+    auto It = LstatCache.find(Path);
+    if (It != LstatCache.end())
+      return It->second;
+
+    return std::nullopt;
+  }
+
+#endif
+};
+
+/// Resolves file system paths with optional caching of results.
+///
+/// Supports lstat, readlink, and realpath operations. Can resolve paths
+/// relative to a base and handle symbolic links. Caches results to reduce
+/// repeated system calls when enabled.
+class PathResolver {
+private:
+  std::shared_ptr<LibraryPathCache> LibPathCache;
+
+public:
+  PathResolver(std::shared_ptr<LibraryPathCache> cache)
+      : LibPathCache(std::move(cache)) {}
+
+  std::optional<std::string> resolve(StringRef Path, std::error_code &ec) {
+    return realpathCached(Path, ec);
+  }
+#ifndef _WIN32
+  mode_t lstatCached(StringRef Path);
+  std::optional<std::string> readlinkCached(StringRef Path);
+#endif
+  std::optional<std::string> realpathCached(StringRef Path, std::error_code &ec,
+                                            StringRef base = "",
+                                            bool baseIsResolved = false,
+                                            long symloopLevel = 40);
+};
+
+/// Performs placeholder substitution in dynamic library paths.
+///
+/// Configures known placeholders (like @loader_path) and replaces them
+/// in input paths with their resolved values.
+class DylibSubstitutor {
+public:
+  void configure(StringRef loaderPath);
+
+  std::string substitute(StringRef input) const {
+    for (const auto &[ph, value] : Placeholders) {
+      if (input.starts_with_insensitive(ph))
+        return (Twine(value) + input.drop_front(ph.size())).str();
+    }
+    return input.str();
+  }
+
+private:
+  StringMap<std::string> Placeholders;
+};
+
+/// Validates and normalizes dynamic library paths.
+///
+/// Uses a `PathResolver` to resolve paths to their canonical form and
+/// checks whether they point to valid shared libraries.
+class DylibPathValidator {
+public:
+  DylibPathValidator(PathResolver &PR) : LibPathResolver(PR) {}
+
+  static bool isSharedLibrary(StringRef Path);
+
+  std::optional<std::string> normalize(StringRef Path) const {
+    std::error_code ec;
+    auto real = LibPathResolver.resolve(Path, ec);
+    if (!real || ec)
+      return std::nullopt;
+
+    return real;
+  }
+
+  /// Validate the given path as a shared library.
+  std::optional<std::string> validate(StringRef Path) const {
+    auto realOpt = normalize(Path);
+    if (!realOpt)
+      return std::nullopt;
+
+    if (!isSharedLibrary(*realOpt))
+      return std::nullopt;
+
+    return realOpt;
+  }
+
+private:
+  PathResolver &LibPathResolver;
+};
+
+enum class SearchPathType {
+  RPath,
+  UsrOrSys,
+  RunPath,
+};
+
+struct SearchPathConfig {
+  ArrayRef<StringRef> Paths;
+  SearchPathType type;
+};
+
+class SearchPathResolver {
+public:
+  SearchPathResolver(const SearchPathConfig &Cfg,
+                     StringRef PlaceholderPrefix = "")
+      : Kind(Cfg.type), PlaceholderPrefix(PlaceholderPrefix) {
+    for (auto &path : Cfg.Paths)
+      Paths.emplace_back(path.str());
+  }
+
+  std::optional<std::string> resolve(StringRef libStem,
+                                     const DylibSubstitutor &Subst,
+                                     DylibPathValidator &Validator) const;
+  SearchPathType searchPathType() const { return Kind; }
+
+private:
+  std::vector<std::string> Paths;
+  SearchPathType Kind;
+  std::string PlaceholderPrefix;
+};
+
+class DylibResolverImpl {
+public:
+  DylibResolverImpl(DylibSubstitutor Substitutor, DylibPathValidator &Validator,
+                    std::vector<SearchPathResolver> Resolvers)
+      : Substitutor(std::move(Substitutor)), Validator(Validator),
+        Resolvers(std::move(Resolvers)) {}
+
+  std::optional<std::string> resolve(StringRef Stem,
+                                     bool VariateLibStem = false) const;
+
+private:
+  std::optional<std::string> tryWithExtensions(StringRef libstem) const;
+
+  DylibSubstitutor Substitutor;
+  DylibPathValidator &Validator;
+  std::vector<SearchPathResolver> Resolvers;
+};
+
+class DylibResolver {
+public:
+  DylibResolver(DylibPathValidator &Validator) : Validator(Validator) {}
+
+  void configure(StringRef loaderPath,
+                 ArrayRef<SearchPathConfig> SearchPathCfg) {
+    DylibSubstitutor Substitutor;
+    Substitutor.configure(loaderPath);
+
+    std::vector<SearchPathResolver> Resolvers;
+    for (const auto &cfg : SearchPathCfg) {
+      Resolvers.emplace_back(cfg,
+                             cfg.type == SearchPathType::RPath ? "@rpath" : "");
+    }
+
+    impl_ = std::make_unique<DylibResolverImpl>(
+        std::move(Substitutor), Validator, std::move(Resolvers));
+  }
+
+  std::optional<std::string> resolve(StringRef libStem,
+                                     bool VariateLibStem = false) const {
+    if (!impl_)
+      return std::nullopt;
+    return impl_->resolve(libStem, VariateLibStem);
+  }
+
+  static std::string resolvelinkerFlag(StringRef libStem,
+                                       StringRef loaderPath) {
+    DylibSubstitutor Substitutor;
+    Substitutor.configure(loaderPath);
+    return Substitutor.substitute(libStem);
+  }
+
+private:
+  DylibPathValidator &Validator;
+  std::unique_ptr<DylibResolverImpl> impl_;
+};
+
+enum class PathType : uint8_t { User, System, Unknown };
+
+enum class ScanState : uint8_t { NotScanned, Scanning, Scanned };
+
+struct LibrarySearchPath {
+  std::string BasePath; // Canonical base directory path
+  PathType Kind;        // User or System
+  std::atomic<ScanState> State;
+
+  LibrarySearchPath(std::string Base, PathType K)
+      : BasePath(std::move(Base)), Kind(K), State(ScanState::NotScanned) {}
+};
+
+/// Scans and tracks libraries for symbol resolution.
+///
+/// Maintains a list of library paths to scan, caches scanned units,
+/// and resolves paths canonically for consistent tracking.
+class LibraryScanHelper {
+public:
+  explicit LibraryScanHelper(const std::vector<std::string> &SPaths,
+                             std::shared_ptr<LibraryPathCache> LibPathCache,
+                             std::shared_ptr<PathResolver> LibPathResolver)
+      : LibPathCache(std::move(LibPathCache)),
+        LibPathResolver(std::move(LibPathResolver)) {
+    DEBUG_WITH_TYPE(
+        "orc", dbgs() << "LibraryScanHelper::LibraryScanHelper: base paths : "
+                      << SPaths.size() << "\n";);
+    for (const auto &p : SPaths)
+      addBasePath(p);
+  }
+
+  void
+  addBasePath(const std::string &P,
+              PathType Kind =
+                  PathType::Unknown); // Add a canonical directory for scanning
+  std::vector<std::shared_ptr<LibrarySearchPath>>
+  getNextBatch(PathType Kind, size_t batchSize);
+
+  bool leftToScan(PathType K) const;
+  void resetToScan();
+
+  bool isTrackedBasePath(StringRef P) const;
+  std::vector<std::shared_ptr<LibrarySearchPath>> getAllUnits() const;
+
+  SmallVector<StringRef> getSearchPaths() const {
+    SmallVector<StringRef> SearchPaths;
+    for (const auto &[_, SP] : LibSearchPaths)
+      SearchPaths.push_back(SP->BasePath);
+    return SearchPaths;
+  }
+
+  PathResolver &getPathResolver() const { return *LibPathResolver; }
+
+  LibraryPathCache &getCache() const { return *LibPathCache; }
+
+  bool hasSeenOrMark(StringRef P) const {
+    return LibPathCache->hasSeenOrMark(P);
+  }
+
+  std::optional<std::string> resolve(StringRef P, std::error_code &ec) const {
+    return LibPathResolver->resolve(P.str(), ec);
+  }
+
+private:
+  std::string resolveCanonical(StringRef P, std::error_code &ec) const;
+  PathType classifyKind(StringRef P) const;
+
+  mutable std::shared_mutex Mtx;
+  std::shared_ptr<LibraryPathCache> LibPathCache;
+  std::shared_ptr<PathResolver> LibPathResolver;
+
+  StringMap<std::shared_ptr<LibrarySearchPath>>
+      LibSearchPaths; // key: canonical path
+  std::deque<StringRef> UnscannedUsr;
+  std::deque<StringRef> UnscannedSys;
+};
+
+/// Loads an object file and provides access to it.
+///
+/// Owns the underlying `ObjectFile` and ensures it is valid.
+/// Any errors encountered during construction are stored and
+/// returned when attempting to access the file.
+class ObjectFileLoader {
+public:
+  /// Construct an object file loader from the given path.
+  explicit ObjectFileLoader(StringRef Path) {
+    auto ObjOrErr = loadObjectFileWithOwnership(Path);
+    if (ObjOrErr)
+      Obj = std::move(*ObjOrErr);
+    else {
+      consumeError(std::move(Err));
+      Err = ObjOrErr.takeError();
+    }
+  }
+
+  ObjectFileLoader(const ObjectFileLoader &) = delete;
+  ObjectFileLoader &operator=(const ObjectFileLoader &) = delete;
+
+  ObjectFileLoader(ObjectFileLoader &&) = default;
+  ObjectFileLoader &operator=(ObjectFileLoader &&) = default;
+
+  /// Get the loaded object file, or return an error if loading failed.
+  Expected<object::ObjectFile &> getObjectFile() {
+    if (Err)
+      return std::move(Err);
+    return *Obj.getBinary();
+  }
+
+  static bool isArchitectureCompatible(const object::ObjectFile &Obj);
+
+private:
+  object::OwningBinary<object::ObjectFile> Obj;
+  Error Err = Error::success();
+
+  static Expected<object::OwningBinary<object::ObjectFile>>
+  loadObjectFileWithOwnership(StringRef FilePath);
+};
+
+/// Scans libraries, resolves dependencies, and registers them.
+class LibraryScanner {
+public:
+  using ShouldScanFn = std::function<bool(StringRef)>;
+
+  LibraryScanner(
+      LibraryScanHelper &H, LibraryManager &LibMgr,
+      ShouldScanFn ShouldScanCall = [](StringRef path) { return true; })
+      : ScanHelper(H), LibMgr(LibMgr),
+        ShouldScanCall(std::move(ShouldScanCall)) {}
+
+  void scanNext(PathType Kind, size_t batchSize = 1);
+
+  /// Dependency info for a library.
+  struct LibraryDepsInfo {
+    llvm::BumpPtrAllocator Alloc;
+    llvm::StringSaver Saver{Alloc};
+
+    SmallVector<StringRef, 2> rpath;
+    SmallVector<StringRef, 2> runPath;
+    SmallVector<StringRef, 4> deps;
+    bool isPIE = false;
+
+    void addRPath(StringRef s) { rpath.push_back(Saver.save(s)); }
+
+    void addRunPath(StringRef s) { runPath.push_back(Saver.save(s)); }
+
+    void addDep(StringRef s) { deps.push_back(Saver.save(s)); }
+  };
+
+private:
+  LibraryScanHelper &ScanHelper;
+  LibraryManager &LibMgr;
+  ShouldScanFn ShouldScanCall;
+
+  std::optional<std::string> shouldScan(StringRef FilePath);
+  Expected<LibraryDepsInfo> extractDeps(StringRef FilePath);
+
+  void handleLibrary(StringRef P, PathType K, int level = 1);
+
+  void scanBaseDir(std::shared_ptr<LibrarySearchPath> U);
+};
+
+using LibraryDepsInfo = LibraryScanner::LibraryDepsInfo;
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_LIBRARYSCANNER_H
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 99f70b1..e3ec7e1 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -198,6 +198,26 @@ struct constantexpr_match {
 /// expression.
 inline constantexpr_match m_ConstantExpr() { return constantexpr_match(); }
 
+template <typename SubPattern_t> struct Splat_match {
+  SubPattern_t SubPattern;
+  Splat_match(const SubPattern_t &SP) : SubPattern(SP) {}
+
+  template <typename OpTy> bool match(OpTy *V) const {
+    if (auto *C = dyn_cast<Constant>(V)) {
+      auto *Splat = C->getSplatValue();
+      return Splat ? SubPattern.match(Splat) : false;
+    }
+    // TODO: Extend to other cases (e.g. shufflevectors).
+    return false;
+  }
+};
+
+/// Match a constant splat. TODO: Extend this to non-constant splats.
+template <typename T>
+inline Splat_match<T> m_ConstantSplat(const T &SubPattern) {
+  return SubPattern;
+}
+
 /// Match an arbitrary basic block value and ignore it.
 inline class_match<BasicBlock> m_BasicBlock() {
   return class_match<BasicBlock>();
@@ -2925,6 +2945,12 @@ inline typename m_Intrinsic_Ty<Opnd0>::Ty m_VecReverse(const Opnd0 &Op0) {
   return m_Intrinsic<Intrinsic::vector_reverse>(Op0);
 }
 
+template <typename Opnd0, typename Opnd1, typename Opnd2>
+inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty
+m_VectorInsert(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
+  return m_Intrinsic<Intrinsic::vector_insert>(Op0, Op1, Op2);
+}
+
 //===----------------------------------------------------------------------===//
 // Matchers for two-operands operators with the operators in either order
 //
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 3dc9055..ec16995 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -32,6 +32,15 @@ def isWindowsMSVCEnvironment : RuntimeLibcallPredicate<
 def isNotOSLinuxAndNotOSOpenBSD : RuntimeLibcallPredicate<
   [{!TT.isOSLinux() && !TT.isOSOpenBSD()}]>;
 
+def isNotOSAIXAndNotOSOpenBSD : RuntimeLibcallPredicate<
+  [{!TT.isOSAIX() && !TT.isOSOpenBSD()}]>;
+
+// OpenBSD uses __guard_local. AIX uses __ssp_canary_word, MSVC/Windows
+// Itanium uses __security_cookie
+def hasStackChkFail : RuntimeLibcallPredicate<
+  [{ !TT.isOSOpenBSD() && !TT.isWindowsMSVCEnvironment() &&
+     !TT.isWindowsItaniumEnvironment()}]>;
+
 def isWindowsMSVCOrItaniumEnvironment : RuntimeLibcallPredicate<
   [{TT.isWindowsMSVCEnvironment() || TT.isWindowsItaniumEnvironment()}]>;
 
@@ -1241,9 +1250,9 @@ defvar LibmHasLdexpF80 = LibcallImpls<(add ldexpl_f80), isNotOSWindowsOrIsCygwin
 defvar LibmHasFrexpF128 = LibcallImpls<(add frexpl_f128), isNotOSWindowsOrIsCygwinMinGW>;
 defvar LibmHasLdexpF128 = LibcallImpls<(add ldexpl_f128), isNotOSWindowsOrIsCygwinMinGW>;
 
-defvar has__stack_chk_fail = LibcallImpls<(add __stack_chk_fail), isNotOSOpenBSD>;
+defvar has__stack_chk_fail = LibcallImpls<(add __stack_chk_fail), hasStackChkFail>;
 defvar has__stack_chk_guard =
-    LibcallImpls<(add __stack_chk_guard), isNotOSOpenBSD>;
+    LibcallImpls<(add __stack_chk_guard), hasStackChkFail>;
 defvar has__stack_smash_handler = LibcallImpls<(add __stack_smash_handler), isOSOpenBSD>;
 defvar has___guard_local = LibcallImpls<(add __guard_local), isOSOpenBSD>;
 
@@ -1396,8 +1405,8 @@ defvar ExceptionModelCallsArm64EC = (add
 def WindowsARM64ECSystemLibrary
     : SystemRuntimeLibrary<isWindowsArm64EC,
                            (add WinArm64ECDefaultRuntimeLibcallImpls,
-                                arm64ec___stack_chk_fail,
-                                __stack_chk_guard,
+                                AvailableIf<arm64ec___stack_chk_fail, hasStackChkFail>,
+                                AvailableIf<__stack_chk_guard, hasStackChkFail>,
                                 LibcallImpls<(add __security_check_cookie_arm64ec,
                                                   __security_cookie),
                                               isWindowsMSVCEnvironment>,
@@ -2318,11 +2327,11 @@ def PPCSystemLibrary
            LibmHasSinCosPPCF128,
            AvailableIf<memcpy, isNotAIX>,
            LibcallImpls<(add Int128RTLibcalls), isPPC64>,
-           has__stack_chk_fail,
            has__stack_smash_handler,
            has___guard_local,
            AvailableIf<__ssp_canary_word, isAIX>,
-           AvailableIf<__stack_chk_guard, isNotAIX>)>;
+           AvailableIf<__stack_chk_fail, isNotOSOpenBSD>,
+           AvailableIf<__stack_chk_guard, isNotOSAIXAndNotOSOpenBSD>)>;
 
 //===----------------------------------------------------------------------===//
 // RISCV Runtime Libcalls
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index a837cdd..3a4dc5a 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -462,6 +462,19 @@ private:
     ModuleMapType ModuleMap;
     // The bitcode modules to compile, if specified by the LTO Config.
     std::optional<ModuleMapType> ModulesToCompile;
+
+    void setPrevailingModuleForGUID(GlobalValue::GUID GUID, StringRef Module) {
+      PrevailingModuleForGUID[GUID] = Module;
+    }
+    bool isPrevailingModuleForGUID(GlobalValue::GUID GUID,
+                                   StringRef Module) const {
+      auto It = PrevailingModuleForGUID.find(GUID);
+      return It != PrevailingModuleForGUID.end() && It->second == Module;
+    }
+
+  private:
+    // Make this private so all accesses must go through above accessor methods
+    // to avoid inadvertently creating new entries on lookups.
     DenseMap<GlobalValue::GUID, StringRef> PrevailingModuleForGUID;
   } ThinLTO;
 
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 3d21f52..47d5d68 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -484,13 +484,6 @@ def propagate_undef_shuffle_mask: GICombineRule<
          [{ return Helper.matchUndefShuffleVectorMask(*${root}); }]),
   (apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
 
-// Replace a G_SHUFFLE_VECTOR with a G_EXTRACT_VECTOR_ELT.
-def shuffle_to_extract: GICombineRule<
-  (defs root:$root),
-  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
-         [{ return Helper.matchShuffleToExtract(*${root}); }]),
-  (apply [{ Helper.applyShuffleToExtract(*${root}); }])>;
-
   // Replace an insert/extract element of an out of bounds index with undef.
   def insert_extract_vec_elt_out_of_bounds : GICombineRule<
   (defs root:$root),
@@ -1674,8 +1667,7 @@ def combine_shuffle_concat : GICombineRule<
 // Combines shuffles of vector into build_vector
 def combine_shuffle_vector_to_build_vector : GICombineRule<
   (defs root:$root),
-  (match (G_SHUFFLE_VECTOR $dst, $src1, $src2, $mask):$root,
-    [{ return Helper.matchCombineShuffleToBuildVector(*${root}); }]),
+  (match (G_SHUFFLE_VECTOR $dst, $src1, $src2, $mask):$root),
   (apply [{ Helper.applyCombineShuffleToBuildVector(*${root}); }])>;
 
 def insert_vector_element_idx_undef : GICombineRule<
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index 92a5b6f..b09f4ed 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -241,9 +241,13 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
       ColdBytes += TotalSize;
       // If we have the max cold context size from summary information and have
       // requested identification of contexts above a percentage of the max, see
-      // if this context qualifies.
-      if (MaxColdSize > 0 && MinPercentMaxColdSize < 100 &&
-          TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize)
+      // if this context qualifies. We should assume this is large if we rebuilt
+      // the trie from existing metadata (i.e. to update after inlining), in
+      // which case we don't have a MaxSize from the profile - we assume any
+      // context size info in existence on the metadata should be propagated.
+      if (BuiltFromExistingMetadata ||
+          (MaxColdSize > 0 && MinPercentMaxColdSize < 100 &&
+           TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize))
         LargeColdContext = true;
     }
     // Only add the context size info as metadata if we need it in the thin
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b425b95..1f10478 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -391,19 +391,6 @@ void CombinerHelper::applyCombineConcatVectors(
   MI.eraseFromParent();
 }
 
-bool CombinerHelper::matchCombineShuffleToBuildVector(MachineInstr &MI) const {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
-         "Invalid instruction");
-  auto &Shuffle = cast<GShuffleVector>(MI);
-
-  Register SrcVec1 = Shuffle.getSrc1Reg();
-  Register SrcVec2 = Shuffle.getSrc2Reg();
-
-  LLT SrcVec1Type = MRI.getType(SrcVec1);
-  LLT SrcVec2Type = MRI.getType(SrcVec2);
-  return SrcVec1Type.isVector() && SrcVec2Type.isVector();
-}
-
 void CombinerHelper::applyCombineShuffleToBuildVector(MachineInstr &MI) const {
   auto &Shuffle = cast<GShuffleVector>(MI);
 
@@ -535,11 +522,9 @@ bool CombinerHelper::matchCombineShuffleVector(
   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
   Register Src1 = MI.getOperand(1).getReg();
   LLT SrcType = MRI.getType(Src1);
-  // As bizarre as it may look, shuffle vector can actually produce
-  // scalar! This is because at the IR level a <1 x ty> shuffle
-  // vector is perfectly valid.
-  unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
-  unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
+
+  unsigned DstNumElts = DstType.getNumElements();
+  unsigned SrcNumElts = SrcType.getNumElements();
 
   // If the resulting vector is smaller than the size of the source
   // vectors being concatenated, we won't be able to replace the
@@ -556,7 +541,7 @@ bool CombinerHelper::matchCombineShuffleVector(
   //
   // TODO: If the size between the source and destination don't match
   //       we could still emit an extract vector element in that case.
-  if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
+  if (DstNumElts < 2 * SrcNumElts)
     return false;
 
   // Check that the shuffle mask can be broken evenly between the
@@ -619,39 +604,6 @@ void CombinerHelper::applyCombineShuffleVector(
   MI.eraseFromParent();
 }
 
-bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) const {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
-         "Invalid instruction kind");
-
-  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
-  return Mask.size() == 1;
-}
-
-void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) const {
-  Register DstReg = MI.getOperand(0).getReg();
-  Builder.setInsertPt(*MI.getParent(), MI);
-
-  int I = MI.getOperand(3).getShuffleMask()[0];
-  Register Src1 = MI.getOperand(1).getReg();
-  LLT Src1Ty = MRI.getType(Src1);
-  int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
-  Register SrcReg;
-  if (I >= Src1NumElts) {
-    SrcReg = MI.getOperand(2).getReg();
-    I -= Src1NumElts;
-  } else if (I >= 0)
-    SrcReg = Src1;
-
-  if (I < 0)
-    Builder.buildUndef(DstReg);
-  else if (!MRI.getType(SrcReg).isVector())
-    Builder.buildCopy(DstReg, SrcReg);
-  else
-    Builder.buildExtractVectorElementConstant(DstReg, SrcReg, I);
-
-  MI.eraseFromParent();
-}
-
 namespace {
 
 /// Select a preference between two uses. CurrentUse is the current preference
@@ -8369,7 +8321,7 @@ bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI,
     return false;
 
   ArrayRef<int> Mask = Shuffle.getMask();
-  const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
+  const unsigned NumSrcElems = Src1Ty.getNumElements();
 
   bool TouchesSrc1 = false;
   bool TouchesSrc2 = false;
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 04d9309..d6f23b6 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -602,6 +602,8 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
                          Depth + 1);
     computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
                          Depth + 1);
+    OffsetKnown = OffsetKnown.sext(BitWidth);
+    WidthKnown = WidthKnown.sext(BitWidth);
     Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
     // Sign extend the extracted value using shift left and arithmetic shift
     // right.
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b49040b..1fc90d0 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3359,6 +3359,54 @@ bool IRTranslator::translateShuffleVector(const User &U,
     Mask = SVI->getShuffleMask();
   else
     Mask = cast<ConstantExpr>(U).getShuffleMask();
+
+  // As GISel does not represent <1 x > vectors as a separate type from scalars,
+  // we transform shuffle_vector with a scalar output to an
+  // ExtractVectorElement. If the input type is also scalar it becomes a Copy.
+  unsigned DstElts = cast<FixedVectorType>(U.getType())->getNumElements();
+  unsigned SrcElts =
+      cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements();
+  if (DstElts == 1) {
+    unsigned M = Mask[0];
+    if (SrcElts == 1) {
+      if (M == 0 || M == 1)
+        return translateCopy(U, *U.getOperand(M), MIRBuilder);
+      MIRBuilder.buildUndef(getOrCreateVReg(U));
+    } else {
+      Register Dst = getOrCreateVReg(U);
+      if (M < SrcElts) {
+        MIRBuilder.buildExtractVectorElementConstant(
+            Dst, getOrCreateVReg(*U.getOperand(0)), M);
+      } else if (M < SrcElts * 2) {
+        MIRBuilder.buildExtractVectorElementConstant(
+            Dst, getOrCreateVReg(*U.getOperand(1)), M - SrcElts);
+      } else {
+        MIRBuilder.buildUndef(Dst);
+      }
+    }
+    return true;
+  }
+
+  // A single element src is transformed to a build_vector.
+  if (SrcElts == 1) {
+    SmallVector<Register> Ops;
+    Register Undef;
+    for (int M : Mask) {
+      LLT SrcTy = getLLTForType(*U.getOperand(0)->getType(), *DL);
+      if (M == 0 || M == 1) {
+        Ops.push_back(getOrCreateVReg(*U.getOperand(M)));
+      } else {
+        if (!Undef.isValid()) {
+          Undef = MRI->createGenericVirtualRegister(SrcTy);
+          MIRBuilder.buildUndef(Undef);
+        }
+        Ops.push_back(Undef);
+      }
+    }
+    MIRBuilder.buildBuildVector(getOrCreateVReg(U), Ops);
+    return true;
+  }
+
   ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
   MIRBuilder
       .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)},
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 38ec83f..f9d27b0 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5819,6 +5819,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
     } else if (InputUsed[0] == -1U) {
       // No input vectors were used! The result is undefined.
       Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
+    } else if (NewElts == 1) {
+      Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
     } else {
       Register Op0 = Inputs[InputUsed[0]];
       // If only one input was used, use an undefined vector for the other.
@@ -9016,22 +9018,18 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
       continue;
     }
 
-    if (Src0Ty.isScalar()) {
-      BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
-    } else {
-      int NumElts = Src0Ty.getNumElements();
-      Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
-      int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
-      auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
-      auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
-      BuildVec.push_back(Extract.getReg(0));
-    }
+    assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
+
+    int NumElts = Src0Ty.getNumElements();
+    Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
+    int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
+    auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
+    auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
+    BuildVec.push_back(Extract.getReg(0));
   }
 
-  if (DstTy.isVector())
-    MIRBuilder.buildBuildVector(DstReg, BuildVec);
-  else
-    MIRBuilder.buildCopy(DstReg, BuildVec[0]);
+  assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
+  MIRBuilder.buildBuildVector(DstReg, BuildVec);
   MI.eraseFromParent();
   return Legalized;
 }
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 27df7e3..4b4df98 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -800,10 +800,11 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
   LLT DstTy = Res.getLLTTy(*getMRI());
   LLT Src1Ty = Src1.getLLTTy(*getMRI());
   LLT Src2Ty = Src2.getLLTTy(*getMRI());
-  const LLT DstElemTy = DstTy.isVector() ? DstTy.getElementType() : DstTy;
-  const LLT ElemTy1 = Src1Ty.isVector() ? Src1Ty.getElementType() : Src1Ty;
-  const LLT ElemTy2 = Src2Ty.isVector() ? Src2Ty.getElementType() : Src2Ty;
+  const LLT DstElemTy = DstTy.getScalarType();
+  const LLT ElemTy1 = Src1Ty.getScalarType();
+  const LLT ElemTy2 = Src2Ty.getScalarType();
   assert(DstElemTy == ElemTy1 && DstElemTy == ElemTy2);
+  assert(Mask.size() > 1 && "Scalar G_SHUFFLE_VECTOR are not supported");
   (void)DstElemTy;
   (void)ElemTy1;
   (void)ElemTy2;
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 6a464d9..4795d81 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -2788,6 +2788,9 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {
   if (expectAndConsume(MIToken::rparen))
     return error("shufflemask should be terminated by ')'.");
 
+  if (ShufMask.size() < 2)
+    return error("shufflemask should have > 1 element");
+
   ArrayRef<int> MaskAlloc = MF.allocateShuffleMask(ShufMask);
   Dest = MachineOperand::CreateShuffleMask(MaskAlloc);
   return false;
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 1154855..c0710c4 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1924,13 +1924,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
     if (Src0Ty != Src1Ty)
       report("Source operands must be the same type", MI);
 
-    if (Src0Ty.getScalarType() != DstTy.getScalarType())
+    if (Src0Ty.getScalarType() != DstTy.getScalarType()) {
       report("G_SHUFFLE_VECTOR cannot change element type", MI);
+      break;
+    }
+    if (!Src0Ty.isVector()) {
+      report("G_SHUFFLE_VECTOR must have vector src", MI);
+      break;
+    }
+    if (!DstTy.isVector()) {
+      report("G_SHUFFLE_VECTOR must have vector dst", MI);
+      break;
+    }
 
     // Don't check that all operands are vector because scalars are used in
     // place of 1 element vectors.
-    int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1;
-    int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1;
+    int SrcNumElts = Src0Ty.getNumElements();
+    int DstNumElts = DstTy.getNumElements();
 
     ArrayRef<int> MaskIdxes = MaskOp.getShuffleMask();
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d2ea652..8676060 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19993,8 +19993,12 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
   //    nor a successor of N. Otherwise, if Op is folded that would
   //    create a cycle.
   unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
-  for (SDNode *Op : Ptr->users()) {
+  for (SDUse &U : Ptr->uses()) {
+    if (U.getResNo() != Ptr.getResNo())
+      continue;
+
     // Check for #1.
+    SDNode *Op = U.getUser();
     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
       continue;
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index bfa566a..dee0909 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1162,6 +1162,43 @@ SDValue SelectionDAGBuilder::getMemoryRoot() {
   return updateRoot(PendingLoads);
 }
 
+SDValue SelectionDAGBuilder::getFPOperationRoot(fp::ExceptionBehavior EB) {
+  // If the new exception behavior differs from that of the pending
+  // ones, chain up them and update the root.
+  switch (EB) {
+  case fp::ExceptionBehavior::ebMayTrap:
+  case fp::ExceptionBehavior::ebIgnore:
+    // Floating-point exceptions produced by such operations are not intended
+    // to be observed, so the sequence of these operations does not need to be
+    // preserved.
+    //
+    // They however must not be mixed with the instructions that have strict
+    // exception behavior. Placing an operation with 'ebIgnore' behavior between
+    // 'ebStrict' operations could distort the observed exception behavior.
+    if (!PendingConstrainedFPStrict.empty()) {
+      assert(PendingConstrainedFP.empty());
+      updateRoot(PendingConstrainedFPStrict);
+    }
+    break;
+  case fp::ExceptionBehavior::ebStrict:
+    // Floating-point exception produced by these operations may be observed, so
+    // they must be correctly chained. If trapping on FP exceptions is
+    // disabled, the exceptions can be observed only by functions that read
+    // exception flags, like 'llvm.get_fpenv' or 'fetestexcept'. It means that
+    // the order of operations is not significant between barriers.
+    //
+    // If trapping is enabled, each operation becomes an implicit observation
+    // point, so the operations must be sequenced according their original
+    // source order.
+    if (!PendingConstrainedFP.empty()) {
+      assert(PendingConstrainedFPStrict.empty());
+      updateRoot(PendingConstrainedFP);
+    }
+    // TODO: Add support for trapping-enabled scenarios.
+  }
+  return DAG.getRoot();
+}
+
 SDValue SelectionDAGBuilder::getRoot() {
   // Chain up all pending constrained intrinsics together with all
   // pending loads, by simply appending them to PendingLoads and
@@ -8298,6 +8335,30 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
   }
 }
 
+void SelectionDAGBuilder::pushFPOpOutChain(SDValue Result,
+                                           fp::ExceptionBehavior EB) {
+  assert(Result.getNode()->getNumValues() == 2);
+  SDValue OutChain = Result.getValue(1);
+  assert(OutChain.getValueType() == MVT::Other);
+
+  // Instead of updating the root immediately, push the produced chain to the
+  // appropriate list, deferring the update until the root is requested. In this
+  // case, the nodes from the lists are chained using TokenFactor, indicating
+  // that the operations are independent.
+  //
+  // In particular, the root is updated before any call that might access the
+  // floating-point environment, except for constrained intrinsics.
+  switch (EB) {
+  case fp::ExceptionBehavior::ebMayTrap:
+  case fp::ExceptionBehavior::ebIgnore:
+    PendingConstrainedFP.push_back(OutChain);
+    break;
+  case fp::ExceptionBehavior::ebStrict:
+    PendingConstrainedFPStrict.push_back(OutChain);
+    break;
+  }
+}
+
 void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
     const ConstrainedFPIntrinsic &FPI) {
   SDLoc sdl = getCurSDLoc();
@@ -8305,42 +8366,16 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
   // We do not need to serialize constrained FP intrinsics against
   // each other or against (nonvolatile) loads, so they can be
   // chained like loads.
-  SDValue Chain = DAG.getRoot();
+  fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
+  SDValue Chain = getFPOperationRoot(EB);
   SmallVector<SDValue, 4> Opers;
   Opers.push_back(Chain);
   for (unsigned I = 0, E = FPI.getNonMetadataArgCount(); I != E; ++I)
     Opers.push_back(getValue(FPI.getArgOperand(I)));
 
-  auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
-    assert(Result.getNode()->getNumValues() == 2);
-
-    // Push node to the appropriate list so that future instructions can be
-    // chained up correctly.
-    SDValue OutChain = Result.getValue(1);
-    switch (EB) {
-    case fp::ExceptionBehavior::ebIgnore:
-      // The only reason why ebIgnore nodes still need to be chained is that
-      // they might depend on the current rounding mode, and therefore must
-      // not be moved across instruction that may change that mode.
-      [[fallthrough]];
-    case fp::ExceptionBehavior::ebMayTrap:
-      // These must not be moved across calls or instructions that may change
-      // floating-point exception masks.
-      PendingConstrainedFP.push_back(OutChain);
-      break;
-    case fp::ExceptionBehavior::ebStrict:
-      // These must not be moved across calls or instructions that may change
-      // floating-point exception masks or read floating-point exception flags.
-      // In addition, they cannot be optimized out even if unused.
-      PendingConstrainedFPStrict.push_back(OutChain);
-      break;
-    }
-  };
-
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = TLI.getValueType(DAG.getDataLayout(), FPI.getType());
   SDVTList VTs = DAG.getVTList(VT, MVT::Other);
-  fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
 
   SDNodeFlags Flags;
   if (EB == fp::ExceptionBehavior::ebIgnore)
@@ -8364,7 +8399,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
         !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
       Opers.pop_back();
       SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
-      pushOutChain(Mul, EB);
+      pushFPOpOutChain(Mul, EB);
       Opcode = ISD::STRICT_FADD;
       Opers.clear();
       Opers.push_back(Mul.getValue(1));
@@ -8395,7 +8430,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
   }
 
   SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags);
-  pushOutChain(Result, EB);
+  pushFPOpOutChain(Result, EB);
 
   SDValue FPResult = Result.getValue(0);
   setValue(&FPI, FPResult);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index c7577fa..47e19f7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -195,6 +195,11 @@ private:
   /// Update root to include all chains from the Pending list.
   SDValue updateRoot(SmallVectorImpl<SDValue> &Pending);
 
+  /// Given a node representing a floating-point operation and its specified
+  /// exception behavior, this either updates the root or stores the node in
+  /// a list to be added to chains latter.
+  void pushFPOpOutChain(SDValue Result, fp::ExceptionBehavior EB);
+
   /// A unique monotonically increasing number used to order the SDNodes we
   /// create.
   unsigned SDNodeOrder;
@@ -300,6 +305,13 @@ public:
   /// memory node that may need to be ordered after any prior load instructions.
   SDValue getMemoryRoot();
 
+  /// Return the current virtual root of the Selection DAG, flushing
+  /// PendingConstrainedFP or PendingConstrainedFPStrict items if the new
+  /// exception behavior (specified by \p EB) differs from that of the pending
+  /// instructions. This must be done before emitting constrained FP operation
+  /// call.
+  SDValue getFPOperationRoot(fp::ExceptionBehavior EB);
+
   /// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict)
   /// items. This must be done before emitting any call other any other node
   /// that may need to be ordered after FP instructions due to other side
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 060b1dd..59798b3 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -2097,6 +2097,11 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
 }
 
 Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
+  // MSVC CRT has a function to validate security cookie.
+  RTLIB::LibcallImpl SecurityCheckCookieLibcall =
+      getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
+  if (SecurityCheckCookieLibcall != RTLIB::Unsupported)
+    return M.getFunction(getLibcallImplName(SecurityCheckCookieLibcall));
   return nullptr;
 }
 
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
index 9275586..ca8192b 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
@@ -16,9 +16,11 @@ add_llvm_component_library(LLVMOrcTargetProcess
   ExecutorSharedMemoryMapperService.cpp
   DefaultHostBootstrapValues.cpp
   ExecutorResolver.cpp
+  LibraryResolver.cpp
   JITLoaderGDB.cpp
   JITLoaderPerf.cpp
   JITLoaderVTune.cpp
+  LibraryScanner.cpp
   OrcRTBootstrap.cpp
   RegisterEHFrames.cpp
   SimpleExecutorDylibManager.cpp
@@ -36,6 +38,8 @@ add_llvm_component_library(LLVMOrcTargetProcess
 
   LINK_COMPONENTS
   ${intel_jit_profiling}
+  BinaryFormat
+  Object
   OrcShared
   Support
   TargetParser
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp
new file mode 100644
index 0000000..9d25b74
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp
@@ -0,0 +1,369 @@
+//===- LibraryResolver.cpp - Library Resolution of Unresolved Symbols ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Library resolution impl for unresolved symbols
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h"
+
+#include "llvm/ADT/StringSet.h"
+
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
+
+#include <mutex>
+#include <thread>
+
+#define DEBUG_TYPE "orc-resolver"
+
+namespace llvm::orc {
+
+LibraryResolver::LibraryResolver(const LibraryResolver::Setup &S)
+    : LibPathCache(S.Cache ? S.Cache : std::make_shared<LibraryPathCache>()),
+      LibPathResolver(S.PResolver
+                          ? S.PResolver
+                          : std::make_shared<PathResolver>(LibPathCache)),
+      ScanHelper(S.BasePaths, LibPathCache, LibPathResolver),
+      FB(S.FilterBuilder), LibMgr(),
+      ShouldScanCall(S.ShouldScanCall ? S.ShouldScanCall
+                                      : [](StringRef) -> bool { return true; }),
+      scanBatchSize(S.ScanBatchSize) {
+
+  if (ScanHelper.getAllUnits().empty()) {
+    LLVM_DEBUG(dbgs() << "Warning: No base paths provided for scanning.\n");
+  }
+}
+
+std::unique_ptr<LibraryResolutionDriver>
+LibraryResolutionDriver::create(const LibraryResolver::Setup &S) {
+  auto LR = std::make_unique<LibraryResolver>(S);
+  return std::unique_ptr<LibraryResolutionDriver>(
+      new LibraryResolutionDriver(std::move(LR)));
+}
+
+void LibraryResolutionDriver::addScanPath(const std::string &Path, PathType K) {
+  LR->ScanHelper.addBasePath(Path, K);
+}
+
+bool LibraryResolutionDriver::markLibraryLoaded(StringRef Path) {
+  auto Lib = LR->LibMgr.getLibrary(Path);
+  if (!Lib)
+    return false;
+
+  Lib->setState(LibraryManager::LibState::Loaded);
+
+  return true;
+}
+
+bool LibraryResolutionDriver::markLibraryUnLoaded(StringRef Path) {
+  auto Lib = LR->LibMgr.getLibrary(Path);
+  if (!Lib)
+    return false;
+
+  Lib->setState(LibraryManager::LibState::Unloaded);
+
+  return true;
+}
+
+void LibraryResolutionDriver::resolveSymbols(
+    std::vector<std::string> Syms,
+    LibraryResolver::OnSearchComplete OnCompletion,
+    const SearchConfig &Config) {
+  LR->searchSymbolsInLibraries(Syms, std::move(OnCompletion), Config);
+}
+
+static bool shouldIgnoreSymbol(const object::SymbolRef &Sym,
+                               uint32_t IgnoreFlags) {
+  Expected<uint32_t> FlagsOrErr = Sym.getFlags();
+  if (!FlagsOrErr) {
+    consumeError(FlagsOrErr.takeError());
+    return true;
+  }
+
+  uint32_t Flags = *FlagsOrErr;
+
+  using Filter = SymbolEnumeratorOptions;
+  if ((IgnoreFlags & Filter::IgnoreUndefined) &&
+      (Flags & object::SymbolRef::SF_Undefined))
+    return true;
+  if ((IgnoreFlags & Filter::IgnoreIndirect) &&
+      (Flags & object::SymbolRef::SF_Indirect))
+    return true;
+  if ((IgnoreFlags & Filter::IgnoreWeak) &&
+      (Flags & object::SymbolRef::SF_Weak))
+    return true;
+
+  return false;
+}
+
+bool SymbolEnumerator::enumerateSymbols(StringRef Path, OnEachSymbolFn OnEach,
+                                        const SymbolEnumeratorOptions &Opts) {
+  if (Path.empty())
+    return false;
+
+  ObjectFileLoader ObjLoader(Path);
+
+  auto ObjOrErr = ObjLoader.getObjectFile();
+  if (!ObjOrErr) {
+    std::string ErrMsg;
+    handleAllErrors(ObjOrErr.takeError(),
+                    [&](const ErrorInfoBase &EIB) { ErrMsg = EIB.message(); });
+    LLVM_DEBUG(dbgs() << "Failed loading object file: " << Path
+                      << "\nError: " << ErrMsg << "\n");
+    return false;
+  }
+
+  object::ObjectFile *Obj = &ObjOrErr.get();
+
+  auto processSymbolRange =
+      [&](object::ObjectFile::symbol_iterator_range Range) -> EnumerateResult {
+    for (const auto &Sym : Range) {
+      if (shouldIgnoreSymbol(Sym, Opts.FilterFlags))
+        continue;
+
+      auto NameOrErr = Sym.getName();
+      if (!NameOrErr) {
+        consumeError(NameOrErr.takeError());
+        continue;
+      }
+
+      StringRef Name = *NameOrErr;
+      if (Name.empty())
+        continue;
+
+      EnumerateResult Res = OnEach(Name);
+      if (Res != EnumerateResult::Continue)
+        return Res;
+    }
+    return EnumerateResult::Continue;
+  };
+
+  EnumerateResult Res = processSymbolRange(Obj->symbols());
+  if (Res != EnumerateResult::Continue)
+    return Res == EnumerateResult::Stop;
+
+  if (Obj->isELF()) {
+    const auto *ElfObj = cast<object::ELFObjectFileBase>(Obj);
+    Res = processSymbolRange(ElfObj->getDynamicSymbolIterators());
+    if (Res != EnumerateResult::Continue)
+      return Res == EnumerateResult::Stop;
+  } else if (Obj->isCOFF()) {
+    const auto *CoffObj = cast<object::COFFObjectFile>(Obj);
+    for (auto I = CoffObj->export_directory_begin(),
+              E = CoffObj->export_directory_end();
+         I != E; ++I) {
+      StringRef Name;
+      if (I->getSymbolName(Name))
+        continue;
+      if (Name.empty())
+        continue;
+
+      if (OnEach(Name) != EnumerateResult::Continue)
+        return false;
+    }
+  } else if (Obj->isMachO()) {
+  }
+
+  return true;
+}
+
+class SymbolSearchContext {
+public:
+  SymbolSearchContext(SymbolQuery &Q) : Q(Q) {}
+
+  bool hasSearched(LibraryInfo *Lib) const { return Searched.count(Lib); }
+
+  void markSearched(LibraryInfo *Lib) { Searched.insert(Lib); }
+
+  inline bool allResolved() const { return Q.allResolved(); }
+
+  SymbolQuery &query() { return Q; }
+
+private:
+  SymbolQuery &Q;
+  DenseSet<LibraryInfo *> Searched;
+};
+
+void LibraryResolver::resolveSymbolsInLibrary(
+    LibraryInfo &Lib, SymbolQuery &UnresolvedSymbols,
+    const SymbolEnumeratorOptions &Opts) {
+  LLVM_DEBUG(dbgs() << "Checking unresolved symbols "
+                    << " in library : " << Lib.getFileName() << "\n";);
+  StringSet<> DiscoveredSymbols;
+
+  if (!UnresolvedSymbols.hasUnresolved()) {
+    LLVM_DEBUG(dbgs() << "Skipping library: " << Lib.getFullPath()
+                      << " — unresolved symbols exist.\n";);
+    return;
+  }
+
+  bool HasEnumerated = false;
+  auto enumerateSymbolsIfNeeded = [&]() {
+    if (HasEnumerated)
+      return;
+
+    HasEnumerated = true;
+
+    LLVM_DEBUG(dbgs() << "Enumerating symbols in library: " << Lib.getFullPath()
+                      << "\n";);
+    SymbolEnumerator::enumerateSymbols(
+        Lib.getFullPath(),
+        [&](StringRef sym) {
+          DiscoveredSymbols.insert(sym);
+          return EnumerateResult::Continue;
+        },
+        Opts);
+
+    if (DiscoveredSymbols.empty()) {
+      LLVM_DEBUG(dbgs() << "  No symbols and remove library : "
+                        << Lib.getFullPath() << "\n";);
+      LibMgr.removeLibrary(Lib.getFullPath());
+      return;
+    }
+  };
+
+  if (!Lib.hasFilter()) {
+    LLVM_DEBUG(dbgs() << "Building filter for library: " << Lib.getFullPath()
+                      << "\n";);
+    enumerateSymbolsIfNeeded();
+    SmallVector<StringRef> SymbolVec;
+    SymbolVec.reserve(DiscoveredSymbols.size());
+    for (const auto &KV : DiscoveredSymbols)
+      SymbolVec.push_back(KV.first());
+
+    Lib.ensureFilterBuilt(FB, SymbolVec);
+    LLVM_DEBUG({
+      dbgs() << "DiscoveredSymbols : " << DiscoveredSymbols.size() << "\n";
+      for (const auto &KV : DiscoveredSymbols)
+        dbgs() << "DiscoveredSymbols : " << KV.first() << "\n";
+    });
+  }
+
+  const auto &Unresolved = UnresolvedSymbols.getUnresolvedSymbols();
+  bool HadAnySym = false;
+  LLVM_DEBUG(dbgs() << "Total unresolved symbols : " << Unresolved.size()
+                    << "\n";);
+  for (const auto &Sym : Unresolved) {
+    if (Lib.mayContain(Sym)) {
+      LLVM_DEBUG(dbgs() << "Checking symbol '" << Sym
+                        << "' in library: " << Lib.getFullPath() << "\n";);
+      enumerateSymbolsIfNeeded();
+      if (DiscoveredSymbols.count(Sym) > 0) {
+        LLVM_DEBUG(dbgs() << "  Resolved symbol: " << Sym
+                          << " in library: " << Lib.getFullPath() << "\n";);
+        UnresolvedSymbols.resolve(Sym, Lib.getFullPath());
+        HadAnySym = true;
+      }
+    }
+  }
+
+  using LibraryState = LibraryManager::LibState;
+  if (HadAnySym && Lib.getState() != LibraryState::Loaded)
+    Lib.setState(LibraryState::Queried);
+}
+
+void LibraryResolver::searchSymbolsInLibraries(
+    std::vector<std::string> &SymbolList, OnSearchComplete OnComplete,
+    const SearchConfig &Config) {
+  SymbolQuery Q(SymbolList);
+
+  using LibraryState = LibraryManager::LibState;
+  using LibraryType = PathType;
+  auto tryResolveFrom = [&](LibraryState S, LibraryType K) {
+    LLVM_DEBUG(dbgs() << "Trying resolve from state=" << static_cast<int>(S)
+                      << " type=" << static_cast<int>(K) << "\n";);
+
+    SymbolSearchContext Ctx(Q);
+    while (!Ctx.allResolved()) {
+
+      for (auto &Lib : LibMgr.getView(S, K)) {
+        if (Ctx.hasSearched(Lib.get()))
+          continue;
+
+        // can use Async here?
+        resolveSymbolsInLibrary(*Lib, Ctx.query(), Config.Options);
+        Ctx.markSearched(Lib.get());
+
+        if (Ctx.allResolved())
+          return;
+      }
+
+      if (Ctx.allResolved())
+        return;
+
+      if (!scanLibrariesIfNeeded(K, scanBatchSize))
+        break; // no more new libs to scan
+    }
+  };
+
+  for (const auto &[St, Ty] : Config.Policy.Plan) {
+    tryResolveFrom(St, Ty);
+    if (Q.allResolved())
+      break;
+  }
+
+  // done:
+  LLVM_DEBUG({
+    dbgs() << "Search complete.\n";
+    for (const auto &r : Q.getAllResults())
+      dbgs() << "Resolved Symbol:" << r->Name << " -> " << r->ResolvedLibPath
+             << "\n";
+  });
+
+  OnComplete(Q);
+}
+
+bool LibraryResolver::scanLibrariesIfNeeded(PathType PK, size_t BatchSize) {
+  LLVM_DEBUG(dbgs() << "LibraryResolver::scanLibrariesIfNeeded: Scanning for "
+                    << (PK == PathType::User ? "User" : "System")
+                    << " libraries\n";);
+  if (!ScanHelper.leftToScan(PK))
+    return false;
+
+  LibraryScanner Scanner(ScanHelper, LibMgr, ShouldScanCall);
+  Scanner.scanNext(PK, BatchSize);
+  return true;
+}
+
+bool LibraryResolver::symbolExistsInLibrary(const LibraryInfo &Lib,
+                                            StringRef SymName,
+                                            std::vector<std::string> *AllSyms) {
+  SymbolEnumeratorOptions Opts;
+  return symbolExistsInLibrary(Lib, SymName, AllSyms, Opts);
+}
+
+bool LibraryResolver::symbolExistsInLibrary(
+    const LibraryInfo &Lib, StringRef SymName,
+    std::vector<std::string> *AllSyms, const SymbolEnumeratorOptions &Opts) {
+  bool Found = false;
+
+  SymbolEnumerator::enumerateSymbols(
+      Lib.getFullPath(),
+      [&](StringRef Sym) {
+        if (AllSyms)
+          AllSyms->emplace_back(Sym.str());
+
+        if (Sym == SymName) {
+          Found = true;
+        }
+
+        return EnumerateResult::Continue;
+      },
+      Opts);
+
+  return Found;
+}
+
+} // end namespace llvm::orc
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp
new file mode 100644
index 0000000..f1e8b5d
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp
@@ -0,0 +1,1161 @@
+//===- LibraryScanner.cpp - Provide Library Scanning Implementation ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h"
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/MachOUniversal.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/Triple.h"
+
+#ifdef LLVM_ON_UNIX
+#include <sys/stat.h>
+#include <unistd.h>
+#endif // LLVM_ON_UNIX
+
+#ifdef __APPLE__
+#include <sys/stat.h>
+#undef LC_LOAD_DYLIB
+#undef LC_RPATH
+#endif // __APPLE__
+
+#define DEBUG_TYPE "orc-scanner"
+
+namespace llvm::orc {
+
+void handleError(Error Err, StringRef context = "") {
+  consumeError(handleErrors(std::move(Err), [&](const ErrorInfoBase &EIB) {
+    dbgs() << "LLVM Error";
+    if (!context.empty())
+      dbgs() << " [" << context << "]";
+    dbgs() << ": " << EIB.message() << "\n";
+  }));
+}
+
+bool ObjectFileLoader::isArchitectureCompatible(const object::ObjectFile &Obj) {
+  Triple HostTriple(sys::getDefaultTargetTriple());
+  Triple ObjTriple = Obj.makeTriple();
+
+  LLVM_DEBUG({
+    dbgs() << "Host triple: " << HostTriple.str()
+           << ", Object triple: " << ObjTriple.str() << "\n";
+  });
+
+  if (ObjTriple.getArch() != Triple::UnknownArch &&
+      HostTriple.getArch() != ObjTriple.getArch())
+    return false;
+
+  if (ObjTriple.getOS() != Triple::UnknownOS &&
+      HostTriple.getOS() != ObjTriple.getOS())
+    return false;
+
+  if (ObjTriple.getEnvironment() != Triple::UnknownEnvironment &&
+      HostTriple.getEnvironment() != Triple::UnknownEnvironment &&
+      HostTriple.getEnvironment() != ObjTriple.getEnvironment())
+    return false;
+
+  return true;
+}
+
+Expected<object::OwningBinary<object::ObjectFile>>
+ObjectFileLoader::loadObjectFileWithOwnership(StringRef FilePath) {
+  LLVM_DEBUG(dbgs() << "ObjectFileLoader: Attempting to open file " << FilePath
+                    << "\n";);
+  auto BinOrErr = object::createBinary(FilePath);
+  if (!BinOrErr) {
+    LLVM_DEBUG(dbgs() << "ObjectFileLoader: Failed to open file " << FilePath
+                      << "\n";);
+    return BinOrErr.takeError();
+  }
+
+  LLVM_DEBUG(dbgs() << "ObjectFileLoader: Successfully opened file " << FilePath
+                    << "\n";);
+
+  auto OwningBin = BinOrErr->takeBinary();
+  object::Binary *Bin = OwningBin.first.get();
+
+  if (Bin->isArchive()) {
+    LLVM_DEBUG(dbgs() << "ObjectFileLoader: File is an archive, not supported: "
+                      << FilePath << "\n";);
+    return createStringError(std::errc::invalid_argument,
+                             "Archive files are not supported: %s",
+                             FilePath.str().c_str());
+  }
+
+#if defined(__APPLE__)
+  if (auto *UB = dyn_cast<object::MachOUniversalBinary>(Bin)) {
+    LLVM_DEBUG(dbgs() << "ObjectFileLoader: Detected Mach-O universal binary: "
+                      << FilePath << "\n";);
+    for (auto ObjForArch : UB->objects()) {
+      auto ObjOrErr = ObjForArch.getAsObjectFile();
+      if (!ObjOrErr) {
+        LLVM_DEBUG(
+            dbgs()
+                << "ObjectFileLoader: Skipping invalid architecture slice\n";);
+
+        consumeError(ObjOrErr.takeError());
+        continue;
+      }
+
+      std::unique_ptr<object::ObjectFile> Obj = std::move(ObjOrErr.get());
+      if (isArchitectureCompatible(*Obj)) {
+        LLVM_DEBUG(
+            dbgs() << "ObjectFileLoader: Found compatible object slice\n";);
+
+        return object::OwningBinary<object::ObjectFile>(
+            std::move(Obj), std::move(OwningBin.second));
+
+      } else {
+        LLVM_DEBUG(dbgs() << "ObjectFileLoader: Incompatible architecture "
+                             "slice skipped\n";);
+      }
+    }
+    LLVM_DEBUG(dbgs() << "ObjectFileLoader: No compatible slices found in "
+                         "universal binary\n";);
+    return createStringError(inconvertibleErrorCode(),
+                             "No compatible object found in fat binary: %s",
+                             FilePath.str().c_str());
+  }
+#endif
+
+  auto ObjOrErr =
+      object::ObjectFile::createObjectFile(Bin->getMemoryBufferRef());
+  if (!ObjOrErr) {
+    LLVM_DEBUG(dbgs() << "ObjectFileLoader: Failed to create object file\n";);
+    return ObjOrErr.takeError();
+  }
+  LLVM_DEBUG(dbgs() << "ObjectFileLoader: Detected object file\n";);
+
+  std::unique_ptr<object::ObjectFile> Obj = std::move(*ObjOrErr);
+  if (!isArchitectureCompatible(*Obj)) {
+    LLVM_DEBUG(dbgs() << "ObjectFileLoader: Incompatible architecture: "
+                      << FilePath << "\n";);
+    return createStringError(inconvertibleErrorCode(),
+                             "Incompatible object file: %s",
+                             FilePath.str().c_str());
+  }
+
+  LLVM_DEBUG(dbgs() << "ObjectFileLoader: Object file is compatible\n";);
+
+  return object::OwningBinary<object::ObjectFile>(std::move(Obj),
+                                                  std::move(OwningBin.second));
+}
+
+template <class ELFT>
+bool isELFSharedLibrary(const object::ELFFile<ELFT> &ELFObj) {
+  if (ELFObj.getHeader().e_type != ELF::ET_DYN)
+    return false;
+
+  auto PHOrErr = ELFObj.program_headers();
+  if (!PHOrErr) {
+    consumeError(PHOrErr.takeError());
+    return true;
+  }
+
+  for (auto Phdr : *PHOrErr) {
+    if (Phdr.p_type == ELF::PT_INTERP)
+      return false;
+  }
+
+  return true;
+}
+
+bool isSharedLibraryObject(object::ObjectFile &Obj) {
+  if (Obj.isELF()) {
+    if (auto *ELF32LE = dyn_cast<object::ELF32LEObjectFile>(&Obj))
+      return isELFSharedLibrary(ELF32LE->getELFFile());
+    if (auto *ELF64LE = dyn_cast<object::ELF64LEObjectFile>(&Obj))
+      return isELFSharedLibrary(ELF64LE->getELFFile());
+    if (auto *ELF32BE = dyn_cast<object::ELF32BEObjectFile>(&Obj))
+      return isELFSharedLibrary(ELF32BE->getELFFile());
+    if (auto *ELF64BE = dyn_cast<object::ELF64BEObjectFile>(&Obj))
+      return isELFSharedLibrary(ELF64BE->getELFFile());
+  } else if (Obj.isMachO()) {
+    const object::MachOObjectFile *MachO =
+        dyn_cast<object::MachOObjectFile>(&Obj);
+    if (!MachO) {
+      LLVM_DEBUG(dbgs() << "Failed to cast to MachOObjectFile.\n";);
+      return false;
+    }
+    LLVM_DEBUG({
+      bool Result =
+          MachO->getHeader().filetype == MachO::HeaderFileType::MH_DYLIB;
+      dbgs() << "Mach-O filetype: " << MachO->getHeader().filetype
+             << " (MH_DYLIB == " << MachO::HeaderFileType::MH_DYLIB
+             << "), shared: " << Result << "\n";
+    });
+
+    return MachO->getHeader().filetype == MachO::HeaderFileType::MH_DYLIB;
+  } else if (Obj.isCOFF()) {
+    const object::COFFObjectFile *coff = dyn_cast<object::COFFObjectFile>(&Obj);
+    if (!coff)
+      return false;
+    return coff->getCharacteristics() & COFF::IMAGE_FILE_DLL;
+  } else {
+    LLVM_DEBUG(dbgs() << "Binary is not an ObjectFile.\n";);
+  }
+
+  return false;
+}
+
+bool DylibPathValidator::isSharedLibrary(StringRef Path) {
+  LLVM_DEBUG(dbgs() << "Checking if path is a shared library: " << Path
+                    << "\n";);
+
+  auto FileType = sys::fs::get_file_type(Path, /*Follow*/ true);
+  if (FileType != sys::fs::file_type::regular_file) {
+    LLVM_DEBUG(dbgs() << "File type is not a regular file for path: " << Path
+                      << "\n";);
+    return false;
+  }
+
+  file_magic MagicCode;
+  identify_magic(Path, MagicCode);
+
+  // Skip archives.
+  if (MagicCode == file_magic::archive)
+    return false;
+
+  // Universal binary handling.
+#if defined(__APPLE__)
+  if (MagicCode == file_magic::macho_universal_binary) {
+    ObjectFileLoader ObjLoader(Path);
+    auto ObjOrErr = ObjLoader.getObjectFile();
+    if (!ObjOrErr) {
+      consumeError(ObjOrErr.takeError());
+      return false;
+    }
+    return isSharedLibraryObject(ObjOrErr.get());
+  }
+#endif
+
+  // Object file inspection for PE/COFF, ELF, and Mach-O
+  bool NeedsObjectInspection =
+#if defined(_WIN32)
+      (MagicCode == file_magic::pecoff_executable);
+#elif defined(__APPLE__)
+      (MagicCode == file_magic::macho_fixed_virtual_memory_shared_lib ||
+       MagicCode == file_magic::macho_dynamically_linked_shared_lib ||
+       MagicCode == file_magic::macho_dynamically_linked_shared_lib_stub);
+#elif defined(LLVM_ON_UNIX)
+#ifdef __CYGWIN__
+      (MagicCode == file_magic::pecoff_executable);
+#else
+      (MagicCode == file_magic::elf_shared_object);
+#endif
+#else
+#error "Unsupported platform."
+#endif
+
+  if (NeedsObjectInspection) {
+    ObjectFileLoader ObjLoader(Path);
+    auto ObjOrErr = ObjLoader.getObjectFile();
+    if (!ObjOrErr) {
+      consumeError(ObjOrErr.takeError());
+      return false;
+    }
+    return isSharedLibraryObject(ObjOrErr.get());
+  }
+
+  LLVM_DEBUG(dbgs() << "Path is not identified as a shared library: " << Path
+                    << "\n";);
+  return false;
+}
+
+void DylibSubstitutor::configure(StringRef LoaderPath) {
+  SmallString<512> ExecPath(sys::fs::getMainExecutable(nullptr, nullptr));
+  sys::path::remove_filename(ExecPath);
+
+  SmallString<512> LoaderDir;
+  if (LoaderPath.empty()) {
+    LoaderDir = ExecPath;
+  } else {
+    LoaderDir = LoaderPath.str();
+    if (!sys::fs::is_directory(LoaderPath))
+      sys::path::remove_filename(LoaderDir);
+  }
+
+#ifdef __APPLE__
+  Placeholders["@loader_path"] = std::string(LoaderDir);
+  Placeholders["@executable_path"] = std::string(ExecPath);
+#else
+  Placeholders["$origin"] = std::string(LoaderDir);
+#endif
+}
+
+std::optional<std::string>
+SearchPathResolver::resolve(StringRef Stem, const DylibSubstitutor &Subst,
+                            DylibPathValidator &Validator) const {
+  for (const auto &SP : Paths) {
+    std::string Base = Subst.substitute(SP);
+
+    SmallString<512> FullPath(Base);
+    if (!PlaceholderPrefix.empty() &&
+        Stem.starts_with_insensitive(PlaceholderPrefix))
+      FullPath.append(Stem.drop_front(PlaceholderPrefix.size()));
+    else
+      sys::path::append(FullPath, Stem);
+
+    LLVM_DEBUG(dbgs() << "SearchPathResolver::resolve FullPath = " << FullPath
+                      << "\n";);
+
+    if (auto Valid = Validator.validate(FullPath.str()))
+      return Valid;
+  }
+
+  return std::nullopt;
+}
+
+std::optional<std::string>
+DylibResolverImpl::tryWithExtensions(StringRef LibStem) const {
+  LLVM_DEBUG(dbgs() << "tryWithExtensions: baseName = " << LibStem << "\n";);
+  SmallVector<SmallString<256>, 8> Candidates;
+
+  // Add extensions by platform
+#if defined(__APPLE__)
+  Candidates.emplace_back(LibStem);
+  Candidates.back() += ".dylib";
+#elif defined(_WIN32)
+  Candidates.emplace_back(LibStem);
+  Candidates.back() += ".dll";
+#else
+  Candidates.emplace_back(LibStem);
+  Candidates.back() += ".so";
+#endif
+
+  // Optionally try "lib" prefix if not already there
+  StringRef FileName = sys::path::filename(LibStem);
+  StringRef Base = sys::path::parent_path(LibStem);
+  if (!FileName.starts_with("lib")) {
+    SmallString<256> WithPrefix(Base);
+    if (!WithPrefix.empty())
+      sys::path::append(WithPrefix, ""); // ensure separator if needed
+    WithPrefix += "lib";
+    WithPrefix += FileName;
+
+#if defined(__APPLE__)
+    WithPrefix += ".dylib";
+#elif defined(_WIN32)
+    WithPrefix += ".dll";
+#else
+    WithPrefix += ".so";
+#endif
+
+    Candidates.push_back(std::move(WithPrefix));
+  }
+
+  LLVM_DEBUG({
+    dbgs() << "  Candidates to try:\n";
+    for (const auto &C : Candidates)
+      dbgs() << "    " << C << "\n";
+  });
+
+  // Try all variants using tryAllPaths
+  for (const auto &Name : Candidates) {
+
+    LLVM_DEBUG(dbgs() << "  Trying candidate: " << Name << "\n";);
+
+    for (const auto &R : Resolvers) {
+      if (auto Res = R.resolve(Name, Substitutor, Validator))
+        return Res;
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << "  -> No candidate Resolved.\n";);
+
+  return std::nullopt;
+}
+
+std::optional<std::string>
+DylibResolverImpl::resolve(StringRef LibStem, bool VariateLibStem) const {
+  LLVM_DEBUG(dbgs() << "Resolving library stem: " << LibStem << "\n";);
+
+  // If it is an absolute path, don't try iterate over the paths.
+  if (sys::path::is_absolute(LibStem)) {
+    LLVM_DEBUG(dbgs() << "  -> Absolute path detected.\n";);
+    return Validator.validate(LibStem);
+  }
+
+  if (!LibStem.starts_with_insensitive("@rpath")) {
+    if (auto norm = Validator.validate(Substitutor.substitute(LibStem))) {
+      LLVM_DEBUG(dbgs() << "  -> Resolved after substitution: " << *norm
+                        << "\n";);
+
+      return norm;
+    }
+  }
+
+  for (const auto &R : Resolvers) {
+    LLVM_DEBUG(dbgs() << "  -> Resolving via search path ... \n";);
+    if (auto Result = R.resolve(LibStem, Substitutor, Validator)) {
+      LLVM_DEBUG(dbgs() << "  -> Resolved via search path: " << *Result
+                        << "\n";);
+
+      return Result;
+    }
+  }
+
+  // Expand libStem with paths, extensions, etc.
+  // std::string foundName;
+  if (VariateLibStem) {
+    LLVM_DEBUG(dbgs() << "  -> Trying with extensions...\n";);
+
+    if (auto Norm = tryWithExtensions(LibStem)) {
+      LLVM_DEBUG(dbgs() << "  -> Resolved via tryWithExtensions: " << *Norm
+                        << "\n";);
+
+      return Norm;
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << "  -> Could not resolve: " << LibStem << "\n";);
+
+  return std::nullopt;
+}
+
+#ifndef _WIN32
+mode_t PathResolver::lstatCached(StringRef Path) {
+  // If already cached - retun cached result
+  if (auto Cache = LibPathCache->read_lstat(Path))
+    return *Cache;
+
+  // Not cached: perform lstat and store
+  struct stat buf{};
+  mode_t st_mode = (lstat(Path.str().c_str(), &buf) == -1) ? 0 : buf.st_mode;
+
+  LibPathCache->insert_lstat(Path, st_mode);
+
+  return st_mode;
+}
+
+std::optional<std::string> PathResolver::readlinkCached(StringRef Path) {
+  // If already cached - retun cached result
+  if (auto Cache = LibPathCache->read_link(Path))
+    return Cache;
+
+  // If result not in cache - call system function and cache result
+  char buf[PATH_MAX];
+  ssize_t len;
+  if ((len = readlink(Path.str().c_str(), buf, sizeof(buf))) != -1) {
+    buf[len] = '\0';
+    std::string s(buf);
+    LibPathCache->insert_link(Path, s);
+    return s;
+  }
+  return std::nullopt;
+}
+
+void createComponent(StringRef Path, StringRef BasePath, bool BaseIsResolved,
+                     SmallVector<StringRef, 16> &Component) {
+  StringRef Separator = sys::path::get_separator();
+  if (!BaseIsResolved) {
+    if (Path[0] == '~' &&
+        (Path.size() == 1 || sys::path::is_separator(Path[1]))) {
+      static SmallString<128> HomeP;
+      if (HomeP.str().empty())
+        sys::path::home_directory(HomeP);
+      StringRef(HomeP).split(Component, Separator, /*MaxSplit*/ -1,
+                             /*KeepEmpty*/ false);
+    } else if (BasePath.empty()) {
+      static SmallString<256> CurrentPath;
+      if (CurrentPath.str().empty())
+        sys::fs::current_path(CurrentPath);
+      StringRef(CurrentPath)
+          .split(Component, Separator, /*MaxSplit*/ -1, /*KeepEmpty*/ false);
+    } else {
+      BasePath.split(Component, Separator, /*MaxSplit*/ -1,
+                     /*KeepEmpty*/ false);
+    }
+  }
+
+  Path.split(Component, Separator, /*MaxSplit*/ -1, /*KeepEmpty*/ false);
+}
+
+void normalizePathSegments(SmallVector<StringRef, 16> &PathParts) {
+  SmallVector<StringRef, 16> NormalizedPath;
+  for (auto &Part : PathParts) {
+    if (Part == ".") {
+      continue;
+    } else if (Part == "..") {
+      if (!NormalizedPath.empty() && NormalizedPath.back() != "..") {
+        NormalizedPath.pop_back();
+      } else {
+        NormalizedPath.push_back("..");
+      }
+    } else {
+      NormalizedPath.push_back(Part);
+    }
+  }
+  PathParts.swap(NormalizedPath);
+}
+#endif
+
+std::optional<std::string> PathResolver::realpathCached(StringRef Path,
+                                                        std::error_code &EC,
+                                                        StringRef Base,
+                                                        bool BaseIsResolved,
+                                                        long SymLoopLevel) {
+  EC.clear();
+
+  if (Path.empty()) {
+    EC = std::make_error_code(std::errc::no_such_file_or_directory);
+    LLVM_DEBUG(dbgs() << "PathResolver::realpathCached: Empty path\n";);
+
+    return std::nullopt;
+  }
+
+  if (SymLoopLevel <= 0) {
+    EC = std::make_error_code(std::errc::too_many_symbolic_link_levels);
+    LLVM_DEBUG(
+        dbgs() << "PathResolver::realpathCached: Too many Symlink levels: "
+               << Path << "\n";);
+
+    return std::nullopt;
+  }
+
+  // If already cached - retun cached result
+  bool isRelative = sys::path::is_relative(Path);
+  if (!isRelative) {
+    if (auto Cached = LibPathCache->read_realpath(Path)) {
+      EC = Cached->ErrnoCode;
+      if (EC) {
+        LLVM_DEBUG(dbgs() << "PathResolver::realpathCached: Cached (error) for "
+                          << Path << "\n";);
+      } else {
+        LLVM_DEBUG(
+            dbgs() << "PathResolver::realpathCached: Cached (success) for "
+                   << Path << " => " << Cached->canonicalPath << "\n";);
+      }
+      return Cached->canonicalPath.empty()
+                 ? std::nullopt
+                 : std::make_optional(Cached->canonicalPath);
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << "PathResolver::realpathCached: Resolving path: " << Path
+                    << "\n";);
+
+  // If result not in cache - call system function and cache result
+
+  StringRef Separator(sys::path::get_separator());
+  SmallString<256> Resolved(Separator);
+#ifndef _WIN32
+  SmallVector<StringRef, 16> Components;
+
+  if (isRelative) {
+    if (BaseIsResolved) {
+      Resolved.assign(Base);
+      LLVM_DEBUG(dbgs() << "  Using Resolved base: " << Base << "\n";);
+    }
+    createComponent(Path, Base, BaseIsResolved, Components);
+  } else {
+    Path.split(Components, Separator, /*MaxSplit*/ -1, /*KeepEmpty*/ false);
+  }
+
+  normalizePathSegments(Components);
+  LLVM_DEBUG({
+    for (auto &C : Components)
+      dbgs() << " " << C << " ";
+
+    dbgs() << "\n";
+  });
+
+  // Handle path list items
+  for (const auto &Component : Components) {
+    if (Component == ".")
+      continue;
+    if (Component == "..") {
+      // collapse "a/b/../c" to "a/c"
+      size_t S = Resolved.rfind(Separator);
+      if (S != llvm::StringRef::npos)
+        Resolved.resize(S);
+      if (Resolved.empty())
+        Resolved = Separator;
+      continue;
+    }
+
+    size_t oldSize = Resolved.size();
+    sys::path::append(Resolved, Component);
+    const char *ResolvedPath = Resolved.c_str();
+    LLVM_DEBUG(dbgs() << "  Processing Component: " << Component << " => "
+                      << ResolvedPath << "\n";);
+    mode_t st_mode = lstatCached(ResolvedPath);
+
+    if (S_ISLNK(st_mode)) {
+      LLVM_DEBUG(dbgs() << "    Found symlink: " << ResolvedPath << "\n";);
+
+      auto SymlinkOpt = readlinkCached(ResolvedPath);
+      if (!SymlinkOpt) {
+        EC = std::make_error_code(std::errc::no_such_file_or_directory);
+        LibPathCache->insert_realpath(Path, LibraryPathCache::PathInfo{"", EC});
+        LLVM_DEBUG(dbgs() << "    Failed to read symlink: " << ResolvedPath
+                          << "\n";);
+
+        return std::nullopt;
+      }
+
+      StringRef Symlink = *SymlinkOpt;
+      LLVM_DEBUG(dbgs() << "    Symlink points to: " << Symlink << "\n";);
+
+      std::string resolvedBase = "";
+      if (sys::path::is_relative(Symlink)) {
+        Resolved.resize(oldSize);
+        resolvedBase = Resolved.str().str();
+      }
+
+      auto RealSymlink =
+          realpathCached(Symlink, EC, resolvedBase,
+                         /*BaseIsResolved=*/true, SymLoopLevel - 1);
+      if (!RealSymlink) {
+        LibPathCache->insert_realpath(Path, LibraryPathCache::PathInfo{"", EC});
+        LLVM_DEBUG(dbgs() << "    Failed to resolve symlink target: " << Symlink
+                          << "\n";);
+
+        return std::nullopt;
+      }
+
+      Resolved.assign(*RealSymlink);
+      LLVM_DEBUG(dbgs() << "    Symlink Resolved to: " << Resolved << "\n";);
+
+    } else if (st_mode == 0) {
+      EC = std::make_error_code(std::errc::no_such_file_or_directory);
+      LibPathCache->insert_realpath(Path, LibraryPathCache::PathInfo{"", EC});
+      LLVM_DEBUG(dbgs() << "    Component does not exist: " << ResolvedPath
+                        << "\n";);
+
+      return std::nullopt;
+    }
+  }
+#else
+  sys::fs::real_path(Path, Resolved); // Windows fallback
+#endif
+
+  std::string Canonical = Resolved.str().str();
+  {
+    LibPathCache->insert_realpath(Path, LibraryPathCache::PathInfo{
+                                            Canonical,
+                                            std::error_code() // success
+                                        });
+  }
+  LLVM_DEBUG(dbgs() << "PathResolver::realpathCached: Final Resolved: " << Path
+                    << " => " << Canonical << "\n";);
+  return Canonical;
+}
+
+void LibraryScanHelper::addBasePath(const std::string &Path, PathType K) {
+  std::error_code EC;
+  std::string Canon = resolveCanonical(Path, EC);
+  if (EC) {
+    LLVM_DEBUG(
+        dbgs()
+            << "LibraryScanHelper::addBasePath: Failed to canonicalize path: "
+            << Path << "\n";);
+    return;
+  }
+  std::unique_lock<std::shared_mutex> Lock(Mtx);
+  if (LibSearchPaths.count(Canon)) {
+    LLVM_DEBUG(dbgs() << "LibraryScanHelper::addBasePath: Already added: "
+                      << Canon << "\n";);
+    return;
+  }
+  K = K == PathType::Unknown ? classifyKind(Canon) : K;
+  auto SP = std::make_shared<LibrarySearchPath>(Canon, K);
+  LibSearchPaths[Canon] = SP;
+
+  if (K == PathType::User) {
+    LLVM_DEBUG(dbgs() << "LibraryScanHelper::addBasePath: Added User path: "
+                      << Canon << "\n";);
+    UnscannedUsr.push_back(StringRef(SP->BasePath));
+  } else {
+    LLVM_DEBUG(dbgs() << "LibraryScanHelper::addBasePath: Added System path: "
+                      << Canon << "\n";);
+    UnscannedSys.push_back(StringRef(SP->BasePath));
+  }
+}
+
+std::vector<std::shared_ptr<LibrarySearchPath>>
+LibraryScanHelper::getNextBatch(PathType K, size_t BatchSize) {
+  std::vector<std::shared_ptr<LibrarySearchPath>> Result;
+  auto &Queue = (K == PathType::User) ? UnscannedUsr : UnscannedSys;
+
+  std::unique_lock<std::shared_mutex> Lock(Mtx);
+
+  while (!Queue.empty() && (BatchSize == 0 || Result.size() < BatchSize)) {
+    StringRef Base = Queue.front();
+    auto It = LibSearchPaths.find(Base);
+    if (It != LibSearchPaths.end()) {
+      auto &SP = It->second;
+      ScanState Expected = ScanState::NotScanned;
+      if (SP->State.compare_exchange_strong(Expected, ScanState::Scanning)) {
+        Result.push_back(SP);
+      }
+    }
+    Queue.pop_front();
+  }
+
+  return Result;
+}
+
+bool LibraryScanHelper::isTrackedBasePath(StringRef Path) const {
+  std::error_code EC;
+  std::string Canon = resolveCanonical(Path, EC);
+  if (EC)
+    return false;
+
+  std::shared_lock<std::shared_mutex> Lock(Mtx);
+  return LibSearchPaths.count(Canon) > 0;
+}
+
+bool LibraryScanHelper::leftToScan(PathType K) const {
+  std::shared_lock<std::shared_mutex> Lock(Mtx);
+  for (const auto &KV : LibSearchPaths) {
+    const auto &SP = KV.second;
+    if (SP->Kind == K && SP->State == ScanState::NotScanned)
+      return true;
+  }
+  return false;
+}
+
+void LibraryScanHelper::resetToScan() {
+  std::shared_lock<std::shared_mutex> Lock(Mtx);
+
+  for (auto &[_, SP] : LibSearchPaths) {
+    ScanState Expected = ScanState::Scanned;
+
+    if (!SP->State.compare_exchange_strong(Expected, ScanState::NotScanned))
+      continue;
+
+    auto &TargetList =
+        (SP->Kind == PathType::User) ? UnscannedUsr : UnscannedSys;
+    TargetList.emplace_back(SP->BasePath);
+  }
+}
+
+std::vector<std::shared_ptr<LibrarySearchPath>>
+LibraryScanHelper::getAllUnits() const {
+  std::shared_lock<std::shared_mutex> Lock(Mtx);
+  std::vector<std::shared_ptr<LibrarySearchPath>> Result;
+  Result.reserve(LibSearchPaths.size());
+  for (const auto &[_, SP] : LibSearchPaths) {
+    Result.push_back(SP);
+  }
+  return Result;
+}
+
+std::string LibraryScanHelper::resolveCanonical(StringRef Path,
+                                                std::error_code &EC) const {
+  auto Canon = LibPathResolver->resolve(Path, EC);
+  return EC ? Path.str() : *Canon;
+}
+
+PathType LibraryScanHelper::classifyKind(StringRef Path) const {
+  // Detect home directory
+  const char *Home = getenv("HOME");
+  if (Home && Path.find(Home) == 0)
+    return PathType::User;
+
+  static const std::array<std::string, 5> UserPrefixes = {
+      "/usr/local",    // often used by users for manual installs
+      "/opt/homebrew", // common on macOS
+      "/opt/local",    // MacPorts
+      "/home",         // Linux home dirs
+      "/Users",        // macOS user dirs
+  };
+
+  for (const auto &Prefix : UserPrefixes) {
+    if (Path.find(Prefix) == 0)
+      return PathType::User;
+  }
+
+  return PathType::System;
+}
+
+Expected<LibraryDepsInfo> parseMachODeps(const object::MachOObjectFile &Obj) {
+  LibraryDepsInfo Libdeps;
+  LLVM_DEBUG(dbgs() << "Parsing Mach-O dependencies...\n";);
+  for (const auto &Command : Obj.load_commands()) {
+    switch (Command.C.cmd) {
+    case MachO::LC_LOAD_DYLIB: {
+      MachO::dylib_command dylibCmd = Obj.getDylibIDLoadCommand(Command);
+      const char *name = Command.Ptr + dylibCmd.dylib.name;
+      Libdeps.addDep(name);
+      LLVM_DEBUG(dbgs() << "  Found LC_LOAD_DYLIB: " << name << "\n";);
+    } break;
+    case MachO::LC_LOAD_WEAK_DYLIB:
+    case MachO::LC_REEXPORT_DYLIB:
+    case MachO::LC_LOAD_UPWARD_DYLIB:
+    case MachO::LC_LAZY_LOAD_DYLIB:
+      break;
+    case MachO::LC_RPATH: {
+      // Extract RPATH
+      MachO::rpath_command rpathCmd = Obj.getRpathCommand(Command);
+      const char *rpath = Command.Ptr + rpathCmd.path;
+      LLVM_DEBUG(dbgs() << "  Found LC_RPATH: " << rpath << "\n";);
+
+      SmallVector<StringRef, 4> RawPaths;
+      SplitString(StringRef(rpath), RawPaths,
+                  sys::EnvPathSeparator == ':' ? ":" : ";");
+
+      for (const auto &raw : RawPaths) {
+        Libdeps.addRPath(raw.str()); // Convert to std::string
+        LLVM_DEBUG(dbgs() << "    Parsed RPATH entry: " << raw << "\n";);
+      }
+      break;
+    }
+    }
+  }
+
+  return Expected<LibraryDepsInfo>(std::move(Libdeps));
+}
+
+template <class ELFT>
+static Expected<StringRef> getDynamicStrTab(const object::ELFFile<ELFT> &Elf) {
+  auto DynamicEntriesOrError = Elf.dynamicEntries();
+  if (!DynamicEntriesOrError)
+    return DynamicEntriesOrError.takeError();
+
+  for (const typename ELFT::Dyn &Dyn : *DynamicEntriesOrError) {
+    if (Dyn.d_tag == ELF::DT_STRTAB) {
+      auto MappedAddrOrError = Elf.toMappedAddr(Dyn.getPtr());
+      if (!MappedAddrOrError)
+        return MappedAddrOrError.takeError();
+      return StringRef(reinterpret_cast<const char *>(*MappedAddrOrError));
+    }
+  }
+
+  // If the dynamic segment is not present, we fall back on the sections.
+  auto SectionsOrError = Elf.sections();
+  if (!SectionsOrError)
+    return SectionsOrError.takeError();
+
+  for (const typename ELFT::Shdr &Sec : *SectionsOrError) {
+    if (Sec.sh_type == ELF::SHT_DYNSYM)
+      return Elf.getStringTableForSymtab(Sec);
+  }
+
+  return make_error<StringError>("dynamic string table not found",
+                                 inconvertibleErrorCode());
+}
+
+template <typename ELFT>
+Expected<LibraryDepsInfo> parseELF(const object::ELFFile<ELFT> &Elf) {
+  LibraryDepsInfo Deps;
+  Expected<StringRef> StrTabOrErr = getDynamicStrTab(Elf);
+  if (!StrTabOrErr)
+    return StrTabOrErr.takeError();
+
+  const char *Data = StrTabOrErr->data();
+
+  auto DynamicEntriesOrError = Elf.dynamicEntries();
+  if (!DynamicEntriesOrError) {
+    return DynamicEntriesOrError.takeError();
+  }
+
+  for (const typename ELFT::Dyn &Dyn : *DynamicEntriesOrError) {
+    switch (Dyn.d_tag) {
+    case ELF::DT_NEEDED:
+      Deps.addDep(Data + Dyn.d_un.d_val);
+      break;
+    case ELF::DT_RPATH: {
+      SmallVector<StringRef, 4> RawPaths;
+      SplitString(Data + Dyn.d_un.d_val, RawPaths,
+                  sys::EnvPathSeparator == ':' ? ":" : ";");
+      for (const auto &raw : RawPaths)
+        Deps.addRPath(raw.str());
+      break;
+    }
+    case ELF::DT_RUNPATH: {
+      SmallVector<StringRef, 4> RawPaths;
+      SplitString(Data + Dyn.d_un.d_val, RawPaths,
+                  sys::EnvPathSeparator == ':' ? ":" : ";");
+      for (const auto &raw : RawPaths)
+        Deps.addRunPath(raw.str());
+      break;
+    }
+    case ELF::DT_FLAGS_1:
+      // Check if this is not a pie executable.
+      if (Dyn.d_un.d_val & ELF::DF_1_PIE)
+        Deps.isPIE = true;
+      break;
+      // (Dyn.d_tag == ELF::DT_NULL) continue;
+      // (Dyn.d_tag == ELF::DT_AUXILIARY || Dyn.d_tag == ELF::DT_FILTER)
+    default:
+      break;
+    }
+  }
+
+  return Expected<LibraryDepsInfo>(std::move(Deps));
+}
+
+Expected<LibraryDepsInfo> parseELFDeps(const object::ELFObjectFileBase &Obj) {
+  using namespace object;
+  LLVM_DEBUG(dbgs() << "parseELFDeps: Detected ELF object\n";);
+  if (const auto *ELF = dyn_cast<ELF32LEObjectFile>(&Obj))
+    return parseELF(ELF->getELFFile());
+  else if (const auto *ELF = dyn_cast<ELF32BEObjectFile>(&Obj))
+    return parseELF(ELF->getELFFile());
+  else if (const auto *ELF = dyn_cast<ELF64LEObjectFile>(&Obj))
+    return parseELF(ELF->getELFFile());
+  else if (const auto *ELF = dyn_cast<ELF64BEObjectFile>(&Obj))
+    return parseELF(ELF->getELFFile());
+
+  LLVM_DEBUG(dbgs() << "parseELFDeps: Unknown ELF format\n";);
+  return createStringError(std::errc::not_supported, "Unknown ELF format");
+}
+
+Expected<LibraryDepsInfo> LibraryScanner::extractDeps(StringRef FilePath) {
+  LLVM_DEBUG(dbgs() << "extractDeps: Attempting to open file " << FilePath
+                    << "\n";);
+
+  ObjectFileLoader ObjLoader(FilePath);
+  auto ObjOrErr = ObjLoader.getObjectFile();
+  if (!ObjOrErr) {
+    LLVM_DEBUG(dbgs() << "extractDeps: Failed to open " << FilePath << "\n";);
+    return ObjOrErr.takeError();
+  }
+
+  object::ObjectFile *Obj = &ObjOrErr.get();
+
+  if (auto *elfObj = dyn_cast<object::ELFObjectFileBase>(Obj)) {
+    LLVM_DEBUG(dbgs() << "extractDeps: File " << FilePath
+                      << " is an ELF object\n";);
+
+    return parseELFDeps(*elfObj);
+  }
+
+  if (auto *macho = dyn_cast<object::MachOObjectFile>(Obj)) {
+    LLVM_DEBUG(dbgs() << "extractDeps: File " << FilePath
+                      << " is a Mach-O object\n";);
+    return parseMachODeps(*macho);
+  }
+
+  if (Obj->isCOFF()) {
+    // TODO: COFF support
+    return LibraryDepsInfo();
+  }
+
+  LLVM_DEBUG(dbgs() << "extractDeps: Unsupported binary format for file "
+                    << FilePath << "\n";);
+  return createStringError(inconvertibleErrorCode(),
+                           "Unsupported binary format: %s",
+                           FilePath.str().c_str());
+}
+
+std::optional<std::string> LibraryScanner::shouldScan(StringRef FilePath) {
+  std::error_code EC;
+
+  LLVM_DEBUG(dbgs() << "[shouldScan] Checking: " << FilePath << "\n";);
+
+  // [1] Check file existence early
+  if (!sys::fs::exists(FilePath)) {
+    LLVM_DEBUG(dbgs() << "  -> Skipped: file does not exist.\n";);
+
+    return std::nullopt;
+  }
+
+  // [2] Resolve to canonical path
+  auto CanonicalPathOpt = ScanHelper.resolve(FilePath, EC);
+  if (EC || !CanonicalPathOpt) {
+    LLVM_DEBUG(dbgs() << "  -> Skipped: failed to resolve path (EC="
+                      << EC.message() << ").\n";);
+
+    return std::nullopt;
+  }
+
+  const std::string &CanonicalPath = *CanonicalPathOpt;
+  LLVM_DEBUG(dbgs() << "  -> Canonical path: " << CanonicalPath << "\n");
+
+  // [3] Check if it's a directory — skip directories
+  if (sys::fs::is_directory(CanonicalPath)) {
+    LLVM_DEBUG(dbgs() << "  -> Skipped: path is a directory.\n";);
+
+    return std::nullopt;
+  }
+
+  // [4] Skip if it's not a shared library.
+  if (!DylibPathValidator::isSharedLibrary(CanonicalPath)) {
+    LLVM_DEBUG(dbgs() << "  -> Skipped: not a shared library.\n";);
+    return std::nullopt;
+  }
+
+  // [5] Skip if we've already seen this path (via cache)
+  if (ScanHelper.hasSeenOrMark(CanonicalPath)) {
+    LLVM_DEBUG(dbgs() << "  -> Skipped: already seen.\n";);
+
+    return std::nullopt;
+  }
+
+  // [6] Already tracked in LibraryManager?
+  if (LibMgr.hasLibrary(CanonicalPath)) {
+    LLVM_DEBUG(dbgs() << "  -> Skipped: already tracked by LibraryManager.\n";);
+
+    return std::nullopt;
+  }
+
+  // [7] Run user-defined hook (default: always true)
+  if (!ShouldScanCall(CanonicalPath)) {
+    LLVM_DEBUG(dbgs() << "  -> Skipped: user-defined hook rejected.\n";);
+
+    return std::nullopt;
+  }
+
+  LLVM_DEBUG(dbgs() << "  -> Accepted: ready to scan " << CanonicalPath
+                    << "\n";);
+  return CanonicalPath;
+}
+
+void LibraryScanner::handleLibrary(StringRef FilePath, PathType K, int level) {
+  LLVM_DEBUG(dbgs() << "LibraryScanner::handleLibrary: Scanning: " << FilePath
+                    << ", level=" << level << "\n";);
+  auto CanonPathOpt = shouldScan(FilePath);
+  if (!CanonPathOpt) {
+    LLVM_DEBUG(dbgs() << "  Skipped (shouldScan returned false): " << FilePath
+                      << "\n";);
+
+    return;
+  }
+  const std::string CanonicalPath = *CanonPathOpt;
+
+  auto DepsOrErr = extractDeps(CanonicalPath);
+  if (!DepsOrErr) {
+    LLVM_DEBUG(dbgs() << "  Failed to extract deps for: " << CanonicalPath
+                      << "\n";);
+    handleError(DepsOrErr.takeError());
+    return;
+  }
+
+  LibraryDepsInfo &Deps = *DepsOrErr;
+
+  LLVM_DEBUG({
+    dbgs() << "    Found deps : \n";
+    for (const auto &dep : Deps.deps)
+      dbgs() << "        : " << dep << "\n";
+    dbgs() << "    Found @rpath : " << Deps.rpath.size() << "\n";
+    for (const auto &r : Deps.rpath)
+      dbgs() << "     : " << r << "\n";
+    dbgs() << "    Found @runpath : \n";
+    for (const auto &r : Deps.runPath)
+      dbgs() << "     : " << r << "\n";
+  });
+
+  if (Deps.isPIE && level == 0) {
+    LLVM_DEBUG(dbgs() << "  Skipped PIE executable at top level: "
+                      << CanonicalPath << "\n";);
+
+    return;
+  }
+
+  bool Added = LibMgr.addLibrary(CanonicalPath, K);
+  if (!Added) {
+    LLVM_DEBUG(dbgs() << "  Already added: " << CanonicalPath << "\n";);
+    return;
+  }
+
+  // Heuristic 1: No RPATH/RUNPATH, skip deps
+  if (Deps.rpath.empty() && Deps.runPath.empty()) {
+    LLVM_DEBUG(
+        dbgs() << "LibraryScanner::handleLibrary: Skipping deps (Heuristic1): "
+               << CanonicalPath << "\n";);
+    return;
+  }
+
+  // Heuristic 2: All RPATH and RUNPATH already tracked
+  auto allTracked = [&](const auto &Paths) {
+    LLVM_DEBUG(dbgs() << "   Checking : " << Paths.size() << "\n";);
+    return std::all_of(Paths.begin(), Paths.end(), [&](StringRef P) {
+      LLVM_DEBUG(dbgs() << "      Checking isTrackedBasePath : " << P << "\n";);
+      return ScanHelper.isTrackedBasePath(
+          DylibResolver::resolvelinkerFlag(P, CanonicalPath));
+    });
+  };
+
+  if (allTracked(Deps.rpath) && allTracked(Deps.runPath)) {
+    LLVM_DEBUG(
+        dbgs() << "LibraryScanner::handleLibrary: Skipping deps (Heuristic2): "
+               << CanonicalPath << "\n";);
+    return;
+  }
+
+  DylibPathValidator Validator(ScanHelper.getPathResolver());
+  DylibResolver Resolver(Validator);
+  Resolver.configure(CanonicalPath,
+                     {{Deps.rpath, SearchPathType::RPath},
+                      {ScanHelper.getSearchPaths(), SearchPathType::UsrOrSys},
+                      {Deps.runPath, SearchPathType::RunPath}});
+  for (StringRef Dep : Deps.deps) {
+    LLVM_DEBUG(dbgs() << "  Resolving dep: " << Dep << "\n";);
+    auto DepFullOpt = Resolver.resolve(Dep);
+    if (!DepFullOpt) {
+      LLVM_DEBUG(dbgs() << "    Failed to resolve dep: " << Dep << "\n";);
+
+      continue;
+    }
+    LLVM_DEBUG(dbgs() << "    Resolved dep to: " << *DepFullOpt << "\n";);
+
+    handleLibrary(*DepFullOpt, K, level + 1);
+  }
+}
+
+void LibraryScanner::scanBaseDir(std::shared_ptr<LibrarySearchPath> SP) {
+  if (!sys::fs::is_directory(SP->BasePath) || SP->BasePath.empty()) {
+    LLVM_DEBUG(
+        dbgs() << "LibraryScanner::scanBaseDir: Invalid or empty basePath: "
+               << SP->BasePath << "\n";);
+    return;
+  }
+
+  LLVM_DEBUG(dbgs() << "LibraryScanner::scanBaseDir: Scanning directory: "
+                    << SP->BasePath << "\n";);
+  std::error_code EC;
+
+  SP->State.store(ScanState::Scanning);
+
+  for (sys::fs::directory_iterator It(SP->BasePath, EC), end; It != end && !EC;
+       It.increment(EC)) {
+    auto Entry = *It;
+    if (!Entry.status())
+      continue;
+
+    auto Status = *Entry.status();
+    if (sys::fs::is_regular_file(Status) || sys::fs::is_symlink_file(Status)) {
+      LLVM_DEBUG(dbgs() << "  Found file: " << Entry.path() << "\n";);
+      // async support ?
+      handleLibrary(Entry.path(), SP->Kind);
+    }
+  }
+
+  SP->State.store(ScanState::Scanned);
+}
+
+void LibraryScanner::scanNext(PathType K, size_t BatchSize) {
+  LLVM_DEBUG(dbgs() << "LibraryScanner::scanNext: Scanning next batch of size "
+                    << BatchSize << " for kind "
+                    << (K == PathType::User ? "User" : "System") << "\n";);
+
+  auto SearchPaths = ScanHelper.getNextBatch(K, BatchSize);
+  for (auto &SP : SearchPaths) {
+    LLVM_DEBUG(dbgs() << "  Scanning unit with basePath: " << SP->BasePath
+                      << "\n";);
+
+    scanBaseDir(SP);
+  }
+}
+
+} // end namespace llvm::orc
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 9d0fa11..4bc2a18 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -471,16 +471,14 @@ static void thinLTOInternalizeAndPromoteGUID(
     ValueInfo VI, function_ref<bool(StringRef, ValueInfo)> isExported,
     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
         isPrevailing) {
-  auto ExternallyVisibleCopies =
-      llvm::count_if(VI.getSummaryList(),
-                     [](const std::unique_ptr<GlobalValueSummary> &Summary) {
-                       return !GlobalValue::isLocalLinkage(Summary->linkage());
-                     });
-
   // Before performing index-based internalization and promotion for this GUID,
   // the local flag should be consistent with the summary list linkage types.
   VI.verifyLocal();
 
+  const bool SingleExternallyVisibleCopy =
+      VI.getSummaryList().size() == 1 &&
+      !GlobalValue::isLocalLinkage(VI.getSummaryList().front()->linkage());
+
   for (auto &S : VI.getSummaryList()) {
     // First see if we need to promote an internal value because it is not
     // exported.
@@ -543,7 +541,9 @@ static void thinLTOInternalizeAndPromoteGUID(
         GlobalValue::isExternalWeakLinkage(S->linkage()))
       continue;
 
-    if (isPrevailing(VI.getGUID(), S.get()) && ExternallyVisibleCopies == 1)
+    // We may have a single summary copy that is externally visible but not
+    // prevailing if the prevailing copy is in a native object.
+    if (SingleExternallyVisibleCopy && isPrevailing(VI.getGUID(), S.get()))
       S->setLinkage(GlobalValue::InternalLinkage);
   }
 }
@@ -1086,15 +1086,15 @@ LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
           GlobalValue::getGlobalIdentifier(Sym.getIRName(),
                                            GlobalValue::ExternalLinkage, ""));
       if (R.Prevailing)
-        ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier();
+        ThinLTO.setPrevailingModuleForGUID(GUID, BM.getModuleIdentifier());
     }
   }
 
   if (Error Err =
           BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(),
                          [&](GlobalValue::GUID GUID) {
-                           return ThinLTO.PrevailingModuleForGUID[GUID] ==
-                                  BM.getModuleIdentifier();
+                           return ThinLTO.isPrevailingModuleForGUID(
+                               GUID, BM.getModuleIdentifier());
                          }))
     return Err;
   LLVM_DEBUG(dbgs() << "Module " << BM.getModuleIdentifier() << "\n");
@@ -1108,8 +1108,8 @@ LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
           GlobalValue::getGlobalIdentifier(Sym.getIRName(),
                                            GlobalValue::ExternalLinkage, ""));
       if (R.Prevailing) {
-        assert(ThinLTO.PrevailingModuleForGUID[GUID] ==
-               BM.getModuleIdentifier());
+        assert(
+            ThinLTO.isPrevailingModuleForGUID(GUID, BM.getModuleIdentifier()));
 
         // For linker redefined symbols (via --wrap or --defsym) we want to
         // switch the linkage to `weak` to prevent IPOs from happening.
@@ -1988,7 +1988,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
                                LocalWPDTargetsMap);
 
   auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
-    return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath();
+    return ThinLTO.isPrevailingModuleForGUID(GUID, S->modulePath());
   };
   if (EnableMemProfContextDisambiguation) {
     MemProfContextDisambiguation ContextDisambiguation;
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index ecaeff7..b3ec65c 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -71,7 +71,6 @@ def AArch64PreLegalizerCombiner: GICombiner<
   "AArch64PreLegalizerCombinerImpl", [all_combines,
                                       icmp_redundant_trunc,
                                       fold_global_offset,
-                                      shuffle_to_extract,
                                       ext_addv_to_udot_addv,
                                       ext_uaddv_to_uaddlv,
                                       push_sub_through_zext,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a81de5c..018ef31 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19476,6 +19476,61 @@ static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
                      Op1 ? Op1 : Mul->getOperand(1));
 }
 
+// Multiplying an RDSVL value by a constant can sometimes be done cheaper by
+// folding a power-of-two factor of the constant into the RDSVL immediate and
+// compensating with an extra shift.
+//
+// We rewrite:
+//   (mul (srl (rdsvl 1), w), x)
+// to one of:
+//   (shl (rdsvl y),  z)   if z > 0
+//   (srl (rdsvl y), abs(z))   if z < 0
+// where integers y, z satisfy   x = y * 2^(w + z)   and   y ∈ [-32, 31].
+static SDValue performMulRdsvlCombine(SDNode *Mul, SelectionDAG &DAG) {
+  SDLoc DL(Mul);
+  EVT VT = Mul->getValueType(0);
+  SDValue MulOp0 = Mul->getOperand(0);
+  int ConstMultiplier =
+      cast<ConstantSDNode>(Mul->getOperand(1))->getSExtValue();
+  if ((MulOp0->getOpcode() != ISD::SRL) ||
+      (MulOp0->getOperand(0).getOpcode() != AArch64ISD::RDSVL))
+    return SDValue();
+
+  unsigned AbsConstValue = abs(ConstMultiplier);
+  unsigned OperandShift =
+      cast<ConstantSDNode>(MulOp0->getOperand(1))->getZExtValue();
+
+  // z ≤ ctz(|x|) - w  (largest extra shift we can take while keeping y
+  // integral)
+  int UpperBound = llvm::countr_zero(AbsConstValue) - OperandShift;
+
+  // To keep y in range, with B = 31 for x > 0 and B = 32 for x < 0, we need:
+  // 2^(w + z) ≥ ceil(x / B)  ⇒  z ≥ ceil_log2(ceil(x / B)) - w  (LowerBound).
+  unsigned B = ConstMultiplier < 0 ? 32 : 31;
+  unsigned CeilAxOverB = (AbsConstValue + (B - 1)) / B; // ceil(|x|/B)
+  int LowerBound = llvm::Log2_32_Ceil(CeilAxOverB) - OperandShift;
+
+  // No valid solution found.
+  if (LowerBound > UpperBound)
+    return SDValue();
+
+  // Any value of z in [LowerBound, UpperBound] is valid. Prefer no extra
+  // shift if possible.
+  int Shift = std::min(std::max(/*prefer*/ 0, LowerBound), UpperBound);
+
+  // y = x / 2^(w + z)
+  int32_t RdsvlMul = (AbsConstValue >> (OperandShift + Shift)) *
+                     (ConstMultiplier < 0 ? -1 : 1);
+  auto Rdsvl = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
+                           DAG.getSignedConstant(RdsvlMul, DL, MVT::i32));
+
+  if (Shift == 0)
+    return Rdsvl;
+  return DAG.getNode(Shift < 0 ? ISD::SRL : ISD::SHL, DL, VT, Rdsvl,
+                     DAG.getConstant(abs(Shift), DL, MVT::i32),
+                     SDNodeFlags::Exact);
+}
+
 // Combine v4i32 Mul(And(Srl(X, 15), 0x10001), 0xffff) -> v8i16 CMLTz
 // Same for other types with equivalent constants.
 static SDValue performMulVectorCmpZeroCombine(SDNode *N, SelectionDAG &DAG) {
@@ -19604,6 +19659,9 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
   if (!isa<ConstantSDNode>(N1))
     return SDValue();
 
+  if (SDValue Ext = performMulRdsvlCombine(N, DAG))
+    return Ext;
+
   ConstantSDNode *C = cast<ConstantSDNode>(N1);
   const APInt &ConstValue = C->getAPIntValue();
 
@@ -29430,15 +29488,6 @@ void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
   TargetLowering::insertSSPDeclarations(M);
 }
 
-Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
-  // MSVC CRT has a function to validate security cookie.
-  RTLIB::LibcallImpl SecurityCheckCookieLibcall =
-      getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
-  if (SecurityCheckCookieLibcall != RTLIB::Unsupported)
-    return M.getFunction(getLibcallImplName(SecurityCheckCookieLibcall));
-  return TargetLowering::getSSPStackGuardCheck(M);
-}
-
 Value *
 AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
   // Android provides a fixed TLS slot for the SafeStack pointer. See the
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 9495c9f..2cb8ed2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -366,7 +366,6 @@ public:
   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
 
   void insertSSPDeclarations(Module &M) const override;
-  Function *getSSPStackGuardCheck(const Module &M) const override;
 
   /// If the target has a standard location for the unsafe stack pointer,
   /// returns the address of that location. Otherwise, returns nullptr.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index eab1627..58a53af 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -5298,7 +5298,7 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
 }
 
 multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, 
-                                 SDPatternOperator OpN = null_frag> {
+                                 SDPatternOperator OpN> {
   // double-precision to 32-bit SIMD/FPR
   def SDr :  BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
              [(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index b74ca79..a352096 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5253,114 +5253,11 @@ let Predicates = [HasNEON, HasFPRCVT] in{
   defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>;
   defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>;
   defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>;
-  defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
-  defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
+  defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs", any_fp_to_sint>;
+  defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu", any_fp_to_uint>;
 }
 
 
-// AArch64's FCVT instructions saturate when out of range.
-multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
-  let Predicates = [HasFullFP16] in {
-  def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
-            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
-  def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
-            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
-  }
-  def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
-            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
-  def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
-            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
-  def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
-            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
-  def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
-            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
-  let Predicates = [HasFullFP16] in {
-  def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
-            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
-  def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
-            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
-  }
-  def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
-            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
-  def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
-            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
-  def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
-            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
-  def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
-            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
-  let Predicates = [HasFullFP16] in {
-  def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
-            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
-  def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
-            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
-  }
-  def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
-            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
-  def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
-            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
-  def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
-            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
-  def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
-            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
-
-  let Predicates = [HasFullFP16] in {
-  def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
-            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
-  def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
-            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
-  }
-  def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
-            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
-  def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
-            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
-  def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
-            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
-  def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
-            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
-}
-
-defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
-defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
-
-multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
-  def : Pat<(i32 (to_int (round f32:$Rn))),
-            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
-  def : Pat<(i64 (to_int (round f32:$Rn))),
-            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
-  def : Pat<(i32 (to_int (round f64:$Rn))),
-            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
-  def : Pat<(i64 (to_int (round f64:$Rn))),
-            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
-  // These instructions saturate like fp_to_[su]int_sat.
-  let Predicates = [HasFullFP16] in {
-  def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
-            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
-  def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
-            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
-  }
-  def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
-            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
-  def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
-            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
-  def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
-            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
-  def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
-            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-}
-
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil,  "FCVTPS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil,  "FCVTPU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
-
-
 
 let Predicates = [HasFullFP16] in {
   def : Pat<(i32 (any_lround f16:$Rn)),
@@ -6567,8 +6464,8 @@ defm FCVTNU : SIMDFPTwoScalar<   1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtn
 defm FCVTPS : SIMDFPTwoScalar<   0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>;
 defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>;
 def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
+defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
+defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
 defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe">;
 defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx">;
 defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte">;
@@ -6588,6 +6485,7 @@ defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
 
 // Floating-point conversion patterns.
 multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
+  let Predicates = [HasFPRCVT] in {
   def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))),
             (!cast<Instruction>(INST # SDr) FPR64:$Rn)>;
   def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))),
@@ -6596,6 +6494,7 @@ multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
             (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
   def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
             (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+  }
   def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
             (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
   def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
@@ -6610,6 +6509,8 @@ defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">;
 defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">;
 defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">;
 defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">;
+defm: FPToIntegerSIMDScalarPatterns<any_fp_to_sint, "FCVTZS">;
+defm: FPToIntegerSIMDScalarPatterns<any_fp_to_uint, "FCVTZU">;
 
 multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
   let Predicates = [HasFullFP16] in {
@@ -6666,6 +6567,196 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
 defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
 defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
 
+// AArch64's FCVT instructions saturate when out of range.
+multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
+  let Predicates = [HasFullFP16] in {
+  def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
+            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+  def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
+            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+  }
+  def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
+            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+  def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
+            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+  def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
+            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+  def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
+            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+  let Predicates = [HasFullFP16] in {
+  def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+  }
+  def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+  def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+  // For global-isel we can use register classes to determine
+  // which FCVT instruction to use.
+  let Predicates = [HasFPRCVT] in {
+  def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+            (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+            (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+            (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+  def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+            (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+  }
+  def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+            (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+            (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+  let Predicates = [HasFPRCVT] in {
+  def : Pat<(f32 (bitconvert (i32 (to_int_sat f16:$Rn, i32)))),
+            (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int_sat f16:$Rn, i64)))),
+            (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int_sat f32:$Rn, i64)))),
+            (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+  def : Pat<(f32 (bitconvert (i32 (to_int_sat f64:$Rn, i32)))),
+            (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+  }
+  def : Pat<(f32 (bitconvert (i32 (to_int_sat f32:$Rn, i32)))),
+            (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int_sat f64:$Rn, i64)))),
+            (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+  let Predicates = [HasFullFP16] in {
+  def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
+            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+  def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
+            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+  }
+  def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
+            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+  def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
+            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+  def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
+            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+  def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
+            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+
+  let Predicates = [HasFullFP16] in {
+  def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
+            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+  def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
+            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+  }
+  def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
+            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+  def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
+            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+  def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
+            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+  def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
+            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+}
+
+defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
+defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
+
+multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode to_int_sat_gi, SDNode round, string INST> {
+  def : Pat<(i32 (to_int (round f32:$Rn))),
+            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+  def : Pat<(i64 (to_int (round f32:$Rn))),
+            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+  def : Pat<(i32 (to_int (round f64:$Rn))),
+            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+  def : Pat<(i64 (to_int (round f64:$Rn))),
+            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+  // For global-isel we can use register classes to determine
+  // which FCVT instruction to use.
+  let Predicates = [HasFPRCVT] in {
+  def : Pat<(i64 (to_int (round f32:$Rn))),
+            (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+  def : Pat<(i32 (to_int (round f64:$Rn))),
+            (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+  }
+  def : Pat<(i32 (to_int (round f32:$Rn))),
+            (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+  def : Pat<(i64 (to_int (round f64:$Rn))),
+            (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+  let Predicates = [HasFPRCVT] in {
+  def : Pat<(f64 (bitconvert (i64 (to_int (round f32:$Rn))))),
+            (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+  def : Pat<(f32 (bitconvert (i32 (to_int (round f64:$Rn))))),
+            (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+  }
+  def : Pat<(f32 (bitconvert (i32 (to_int (round f32:$Rn))))),
+            (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int (round f64:$Rn))))),
+            (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+  // These instructions saturate like fp_to_[su]int_sat.
+  let Predicates = [HasFullFP16] in {
+  def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
+            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+  def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
+            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+  }
+  def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
+            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+  def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
+            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+  def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
+            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+  def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
+            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+  // For global-isel we can use register classes to determine
+  // which FCVT instruction to use.
+  let Predicates = [HasFPRCVT] in {
+    def : Pat<(i32 (to_int_sat_gi (round f16:$Rn))),
+              (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+    def : Pat<(i64 (to_int_sat_gi (round f16:$Rn))),
+              (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+    def : Pat<(i64 (to_int_sat_gi (round f32:$Rn))),
+              (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+    def : Pat<(i32 (to_int_sat_gi (round f64:$Rn))),
+              (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+  }
+  def : Pat<(i32 (to_int_sat_gi (round f32:$Rn))),
+            (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+  def : Pat<(i64 (to_int_sat_gi (round f64:$Rn))),
+            (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+            
+  let Predicates = [HasFPRCVT] in {
+    def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f16:$Rn), i32)))),
+              (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+    def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f16:$Rn), i64)))),
+              (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+    def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f32:$Rn), i64)))),
+              (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+    def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f64:$Rn), i32)))),
+              (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+  }
+  def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f32:$Rn), i32)))),
+            (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+  def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f64:$Rn), i64)))),
+            (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+}
+
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fceil,  "FCVTPS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fceil,  "FCVTPU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ffloor, "FCVTMS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ffloor, "FCVTMU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ftrunc, "FCVTZS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ftrunc, "FCVTZU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">;
+
 // f16 -> s16 conversions
 let Predicates = [HasFullFP16] in {
   def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 2053fc4..d50af11 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -957,23 +957,50 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     return TyL.first + ExtraCost;
   }
   case Intrinsic::get_active_lane_mask: {
-    auto *RetTy = dyn_cast<FixedVectorType>(ICA.getReturnType());
-    if (RetTy) {
-      EVT RetVT = getTLI()->getValueType(DL, RetTy);
-      EVT OpVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
-      if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
-          !getTLI()->isTypeLegal(RetVT)) {
-        // We don't have enough context at this point to determine if the mask
-        // is going to be kept live after the block, which will force the vXi1
-        // type to be expanded to legal vectors of integers, e.g. v4i1->v4i32.
-        // For now, we just assume the vectorizer created this intrinsic and
-        // the result will be the input for a PHI. In this case the cost will
-        // be extremely high for fixed-width vectors.
-        // NOTE: getScalarizationOverhead returns a cost that's far too
-        // pessimistic for the actual generated codegen. In reality there are
-        // two instructions generated per lane.
-        return RetTy->getNumElements() * 2;
+    auto RetTy = cast<VectorType>(ICA.getReturnType());
+    EVT RetVT = getTLI()->getValueType(DL, RetTy);
+    EVT OpVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
+    if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
+      break;
+
+    if (RetTy->isScalableTy()) {
+      if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
+          TargetLowering::TypeSplitVector)
+        break;
+
+      auto LT = getTypeLegalizationCost(RetTy);
+      InstructionCost Cost = LT.first;
+      // When SVE2p1 or SME2 is available, we can halve getTypeLegalizationCost
+      // as get_active_lane_mask may lower to the sve_whilelo_x2 intrinsic, e.g.
+      //   nxv32i1 = get_active_lane_mask(base, idx) ->
+      //    {nxv16i1, nxv16i1} = sve_whilelo_x2(base, idx)
+      if (ST->hasSVE2p1() || ST->hasSME2()) {
+        Cost /= 2;
+        if (Cost == 1)
+          return Cost;
       }
+
+      // If more than one whilelo intrinsic is required, include the extra cost
+      // required by the saturating add & select required to increment the
+      // start value after the first intrinsic call.
+      Type *OpTy = ICA.getArgTypes()[0];
+      IntrinsicCostAttributes AddAttrs(Intrinsic::uadd_sat, OpTy, {OpTy, OpTy});
+      InstructionCost SplitCost = getIntrinsicInstrCost(AddAttrs, CostKind);
+      Type *CondTy = OpTy->getWithNewBitWidth(1);
+      SplitCost += getCmpSelInstrCost(Instruction::Select, OpTy, CondTy,
+                                      CmpInst::ICMP_UGT, CostKind);
+      return Cost + (SplitCost * (Cost - 1));
+    } else if (!getTLI()->isTypeLegal(RetVT)) {
+      // We don't have enough context at this point to determine if the mask
+      // is going to be kept live after the block, which will force the vXi1
+      // type to be expanded to legal vectors of integers, e.g. v4i1->v4i32.
+      // For now, we just assume the vectorizer created this intrinsic and
+      // the result will be the input for a PHI. In this case the cost will
+      // be extremely high for fixed-width vectors.
+      // NOTE: getScalarizationOverhead returns a cost that's far too
+      // pessimistic for the actual generated codegen. In reality there are
+      // two instructions generated per lane.
+      return cast<FixedVectorType>(RetTy)->getNumElements() * 2;
     }
     break;
   }
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 3e55b76..14b0f9a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5126,23 +5126,13 @@ bool AArch64InstructionSelector::selectShuffleVector(
     MachineInstr &I, MachineRegisterInfo &MRI) {
   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
   Register Src1Reg = I.getOperand(1).getReg();
-  const LLT Src1Ty = MRI.getType(Src1Reg);
   Register Src2Reg = I.getOperand(2).getReg();
-  const LLT Src2Ty = MRI.getType(Src2Reg);
   ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
 
   MachineBasicBlock &MBB = *I.getParent();
   MachineFunction &MF = *MBB.getParent();
   LLVMContext &Ctx = MF.getFunction().getContext();
 
-  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
-  // it's originated from a <1 x T> type. Those should have been lowered into
-  // G_BUILD_VECTOR earlier.
-  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
-    LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
-    return false;
-  }
-
   unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
 
   SmallVector<Constant *, 64> CstIdxs;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 05a4313..5f93847 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1201,25 +1201,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
         return llvm::is_contained(
             {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
       })
-      // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
-      // destinations, we just want those lowered into G_BUILD_VECTOR or
-      // G_EXTRACT_ELEMENT.
-      .lowerIf([=](const LegalityQuery &Query) {
-        return !Query.Types[0].isVector() || !Query.Types[1].isVector();
-      })
       .moreElementsIf(
           [](const LegalityQuery &Query) {
-            return Query.Types[0].isVector() && Query.Types[1].isVector() &&
-                   Query.Types[0].getNumElements() >
-                       Query.Types[1].getNumElements();
+            return Query.Types[0].getNumElements() >
+                   Query.Types[1].getNumElements();
           },
           changeTo(1, 0))
       .moreElementsToNextPow2(0)
       .moreElementsIf(
           [](const LegalityQuery &Query) {
-            return Query.Types[0].isVector() && Query.Types[1].isVector() &&
-                   Query.Types[0].getNumElements() <
-                       Query.Types[1].getNumElements();
+            return Query.Types[0].getNumElements() <
+                   Query.Types[1].getNumElements();
           },
           changeTo(0, 1))
       .widenScalarOrEltToNextPow2OrMinSize(0, 8)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 830a35bb..6d2d705 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -856,7 +856,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     break;
   }
   case TargetOpcode::G_FPTOSI_SAT:
-  case TargetOpcode::G_FPTOUI_SAT: {
+  case TargetOpcode::G_FPTOUI_SAT:
+  case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_FPTOUI: {
     LLT DstType = MRI.getType(MI.getOperand(0).getReg());
     if (DstType.isVector())
       break;
@@ -864,11 +866,19 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
       break;
     }
-    OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
+    TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
+    TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI);
+    if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
+        all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+               [&](const MachineInstr &UseMI) {
+                 return onlyUsesFP(UseMI, MRI, TRI) ||
+                        prefersFPUse(UseMI, MRI, TRI);
+               }))
+      OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+    else
+      OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
     break;
   }
-  case TargetOpcode::G_FPTOSI:
-  case TargetOpcode::G_FPTOUI:
   case TargetOpcode::G_INTRINSIC_LRINT:
   case TargetOpcode::G_INTRINSIC_LLRINT:
     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8122db2..313ae3d 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -21381,15 +21381,6 @@ void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
   TargetLowering::insertSSPDeclarations(M);
 }
 
-Function *ARMTargetLowering::getSSPStackGuardCheck(const Module &M) const {
-  // MSVC CRT has a function to validate security cookie.
-  RTLIB::LibcallImpl SecurityCheckCookie =
-      getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
-  if (SecurityCheckCookie != RTLIB::Unsupported)
-    return M.getFunction(getLibcallImplName(SecurityCheckCookie));
-  return TargetLowering::getSSPStackGuardCheck(M);
-}
-
 bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
                                                   unsigned &Cost) const {
   // If we do not have NEON, vector types are not natively supported.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 8c5e0cf..357d2c5 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -708,7 +708,6 @@ class VectorType;
     bool useLoadStackGuardNode(const Module &M) const override;
 
     void insertSSPDeclarations(Module &M) const override;
-    Function *getSSPStackGuardCheck(const Module &M) const override;
 
     bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
                                    unsigned &Cost) const override;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4dfc400..410f20e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57617,10 +57617,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
   }
 
   // Fold any similar generic ADD/SUB opcodes to reuse this node.
-  auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
+  auto MatchGeneric = [&](unsigned Opc, SDValue N0, SDValue N1, bool Negate) {
     SDValue Ops[] = {N0, N1};
     SDVTList VTs = DAG.getVTList(N->getValueType(0));
-    if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) {
+    if (SDNode *GenericAddSub = DAG.getNodeIfExists(Opc, VTs, Ops)) {
       SDValue Op(N, 0);
       if (Negate) {
         // Bail if this is only used by a user of the x86 add/sub.
@@ -57632,8 +57632,25 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
       DCI.CombineTo(GenericAddSub, Op);
     }
   };
-  MatchGeneric(LHS, RHS, false);
-  MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
+  MatchGeneric(GenericOpc, LHS, RHS, false);
+  MatchGeneric(GenericOpc, RHS, LHS, X86ISD::SUB == N->getOpcode());
+
+  if (auto *Const = dyn_cast<ConstantSDNode>(RHS)) {
+    SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
+    if (X86ISD::SUB == N->getOpcode()) {
+      // Fold generic add(LHS, -C) to X86ISD::SUB(LHS, C).
+      MatchGeneric(ISD::ADD, LHS, NegC, false);
+    } else {
+      // Negate X86ISD::ADD(LHS, C) and replace generic sub(-C, LHS).
+      MatchGeneric(ISD::SUB, NegC, LHS, true);
+    }
+  } else if (auto *Const = dyn_cast<ConstantSDNode>(LHS)) {
+    if (X86ISD::SUB == N->getOpcode()) {
+      SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
+      // Negate X86ISD::SUB(C, RHS) and replace generic add(RHS, -C).
+      MatchGeneric(ISD::ADD, RHS, NegC, true);
+    }
+  }
 
   // TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the
   // EFLAGS result doesn't change.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index e28b9c1..b7151f6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1592,7 +1592,6 @@ namespace llvm {
     bool useLoadStackGuardNode(const Module &M) const override;
     bool useStackGuardXorFP() const override;
     void insertSSPDeclarations(Module &M) const override;
-    Function *getSSPStackGuardCheck(const Module &M) const override;
     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
                                 const SDLoc &DL) const override;
 
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 37d7772..a61bbe5 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -640,15 +640,6 @@ void X86TargetLowering::insertSSPDeclarations(Module &M) const {
   TargetLowering::insertSSPDeclarations(M);
 }
 
-Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
-  // MSVC CRT has a function to validate security cookie.
-  if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
-      Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
-    return M.getFunction("__security_check_cookie");
-  }
-  return TargetLowering::getSSPStackGuardCheck(M);
-}
-
 Value *
 X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
   // Android provides a fixed TLS slot for the SafeStack pointer. See the
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
index edcf247..632c6a2 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
@@ -1407,7 +1407,7 @@ let isBarrier = 1, isTerminator = 1 in {
     let r = 0x04;
   }
 
-  def BREAK_N : RRRN_Inst<0x0C, (outs), (ins uimm4:$imm),
+  def BREAK_N : RRRN_Inst<0x0D, (outs), (ins uimm4:$imm),
                          "break.n\t$imm", []>, Requires<[HasDensity, HasDebug]> {
     bits<4> imm;
 
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 67e2aae..9c8de45 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2327,6 +2327,18 @@ Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
   return ConstantVector::get(NewVecC);
 }
 
+// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
+static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector,
+                                            Constant *Splat, bool SplatLHS,
+                                            const DataLayout &DL) {
+  ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount();
+  Constant *LHS = ConstantVector::getSplat(EC, Splat);
+  Constant *RHS = Vector;
+  if (!SplatLHS)
+    std::swap(LHS, RHS);
+  return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
+}
+
 Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
   if (!isa<VectorType>(Inst.getType()))
     return nullptr;
@@ -2338,6 +2350,37 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
   assert(cast<VectorType>(RHS->getType())->getElementCount() ==
          cast<VectorType>(Inst.getType())->getElementCount());
 
+  auto foldConstantsThroughSubVectorInsertSplat =
+      [&](Value *MaybeSubVector, Value *MaybeSplat,
+          bool SplatLHS) -> Instruction * {
+    Value *Idx;
+    Constant *Splat, *SubVector, *Dest;
+    if (!match(MaybeSplat, m_ConstantSplat(m_Constant(Splat))) ||
+        !match(MaybeSubVector,
+               m_VectorInsert(m_Constant(Dest), m_Constant(SubVector),
+                              m_Value(Idx))))
+      return nullptr;
+    SubVector =
+        constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL);
+    Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL);
+    if (!SubVector || !Dest)
+      return nullptr;
+    auto *InsertVector =
+        Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx);
+    return replaceInstUsesWith(Inst, InsertVector);
+  };
+
+  // If one operand is a constant splat and the other operand is a
+  // `vector.insert` where both the destination and subvector are constant,
+  // apply the operation to both the destination and subvector, returning a new
+  // constant `vector.insert`. This helps constant folding for scalable vectors.
+  if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
+          /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
+    return Folded;
+  if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
+          /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
+    return Folded;
+
   // If both operands of the binop are vector concatenations, then perform the
   // narrow binop on each pair of the source operands followed by concatenation
   // of the results.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c385c36..84817d7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -943,12 +943,40 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
   }
 }
 
+/// Get any instruction opcode or intrinsic ID data embedded in recipe \p R.
+/// Returns an optional pair, where the first element indicates whether it is
+/// an intrinsic ID.
+static std::optional<std::pair<bool, unsigned>>
+getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) {
+  return TypeSwitch<const VPSingleDefRecipe *,
+                    std::optional<std::pair<bool, unsigned>>>(R)
+      .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
+            VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>(
+          [](auto *I) { return std::make_pair(false, I->getOpcode()); })
+      .Case<VPWidenIntrinsicRecipe>([](auto *I) {
+        return std::make_pair(true, I->getVectorIntrinsicID());
+      })
+      .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) {
+        // For recipes that do not directly map to LLVM IR instructions,
+        // assign opcodes after the last VPInstruction opcode (which is also
+        // after the last IR Instruction opcode), based on the VPDefID.
+        return std::make_pair(false,
+                              VPInstruction::OpsEnd + 1 + I->getVPDefID());
+      })
+      .Default([](auto *) { return std::nullopt; });
+}
+
 /// Try to fold \p R using InstSimplifyFolder. Will succeed and return a
-/// non-nullptr Value for a handled \p Opcode if corresponding \p Operands are
-/// foldable live-ins.
-static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
-                               ArrayRef<VPValue *> Operands,
-                               const DataLayout &DL, VPTypeAnalysis &TypeInfo) {
+/// non-nullptr VPValue for a handled opcode or intrinsic ID if corresponding \p
+/// Operands are foldable live-ins.
+static VPValue *tryToFoldLiveIns(VPSingleDefRecipe &R,
+                                 ArrayRef<VPValue *> Operands,
+                                 const DataLayout &DL,
+                                 VPTypeAnalysis &TypeInfo) {
+  auto OpcodeOrIID = getOpcodeOrIntrinsicID(&R);
+  if (!OpcodeOrIID)
+    return nullptr;
+
   SmallVector<Value *, 4> Ops;
   for (VPValue *Op : Operands) {
     if (!Op->isLiveIn() || !Op->getLiveInIRValue())
@@ -956,43 +984,57 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
     Ops.push_back(Op->getLiveInIRValue());
   }
 
-  InstSimplifyFolder Folder(DL);
-  if (Instruction::isBinaryOp(Opcode))
-    return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0],
+  auto FoldToIRValue = [&]() -> Value * {
+    InstSimplifyFolder Folder(DL);
+    if (OpcodeOrIID->first) {
+      if (R.getNumOperands() != 2)
+        return nullptr;
+      unsigned ID = OpcodeOrIID->second;
+      return Folder.FoldBinaryIntrinsic(ID, Ops[0], Ops[1],
+                                        TypeInfo.inferScalarType(&R));
+    }
+    unsigned Opcode = OpcodeOrIID->second;
+    if (Instruction::isBinaryOp(Opcode))
+      return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode),
+                              Ops[0], Ops[1]);
+    if (Instruction::isCast(Opcode))
+      return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
+                             TypeInfo.inferScalarType(R.getVPSingleValue()));
+    switch (Opcode) {
+    case VPInstruction::LogicalAnd:
+      return Folder.FoldSelect(Ops[0], Ops[1],
+                               ConstantInt::getNullValue(Ops[1]->getType()));
+    case VPInstruction::Not:
+      return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
+                              Constant::getAllOnesValue(Ops[0]->getType()));
+    case Instruction::Select:
+      return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+      return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
                             Ops[1]);
-  if (Instruction::isCast(Opcode))
-    return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
-                           TypeInfo.inferScalarType(R.getVPSingleValue()));
-  switch (Opcode) {
-  case VPInstruction::LogicalAnd:
-    return Folder.FoldSelect(Ops[0], Ops[1],
-                             ConstantInt::getNullValue(Ops[1]->getType()));
-  case VPInstruction::Not:
-    return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
-                            Constant::getAllOnesValue(Ops[0]->getType()));
-  case Instruction::Select:
-    return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
-  case Instruction::ICmp:
-  case Instruction::FCmp:
-    return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
-                          Ops[1]);
-  case Instruction::GetElementPtr: {
-    auto &RFlags = cast<VPRecipeWithIRFlags>(R);
-    auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
-    return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], drop_begin(Ops),
-                          RFlags.getGEPNoWrapFlags());
-  }
-  case VPInstruction::PtrAdd:
-  case VPInstruction::WidePtrAdd:
-    return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
-                          Ops[1],
-                          cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
-  // An extract of a live-in is an extract of a broadcast, so return the
-  // broadcasted element.
-  case Instruction::ExtractElement:
-    assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar");
-    return Ops[0];
-  }
+    case Instruction::GetElementPtr: {
+      auto &RFlags = cast<VPRecipeWithIRFlags>(R);
+      auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
+      return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0],
+                            drop_begin(Ops), RFlags.getGEPNoWrapFlags());
+    }
+    case VPInstruction::PtrAdd:
+    case VPInstruction::WidePtrAdd:
+      return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()),
+                            Ops[0], Ops[1],
+                            cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
+    // An extract of a live-in is an extract of a broadcast, so return the
+    // broadcasted element.
+    case Instruction::ExtractElement:
+      assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar");
+      return Ops[0];
+    }
+    return nullptr;
+  };
+
+  if (Value *V = FoldToIRValue())
+    return R.getParent()->getPlan()->getOrAddLiveIn(V);
   return nullptr;
 }
 
@@ -1006,19 +1048,10 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
 
   // Simplification of live-in IR values for SingleDef recipes using
   // InstSimplifyFolder.
-  if (TypeSwitch<VPRecipeBase *, bool>(&R)
-          .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
-                VPReplicateRecipe, VPWidenSelectRecipe>([&](auto *I) {
-            const DataLayout &DL =
-                Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout();
-            Value *V = tryToFoldLiveIns(*I, I->getOpcode(), I->operands(), DL,
-                                        TypeInfo);
-            if (V)
-              I->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
-            return V;
-          })
-          .Default([](auto *) { return false; }))
-    return;
+  const DataLayout &DL =
+      Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout();
+  if (VPValue *V = tryToFoldLiveIns(*Def, Def->operands(), DL, TypeInfo))
+    return Def->replaceAllUsesWith(V);
 
   // Fold PredPHI LiveIn -> LiveIn.
   if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) {
@@ -1996,29 +2029,6 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
     return Def == getEmptyKey() || Def == getTombstoneKey();
   }
 
-  /// Get any instruction opcode or intrinsic ID data embedded in recipe \p R.
-  /// Returns an optional pair, where the first element indicates whether it is
-  /// an intrinsic ID.
-  static std::optional<std::pair<bool, unsigned>>
-  getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) {
-    return TypeSwitch<const VPSingleDefRecipe *,
-                      std::optional<std::pair<bool, unsigned>>>(R)
-        .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
-              VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>(
-            [](auto *I) { return std::make_pair(false, I->getOpcode()); })
-        .Case<VPWidenIntrinsicRecipe>([](auto *I) {
-          return std::make_pair(true, I->getVectorIntrinsicID());
-        })
-        .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) {
-          // For recipes that do not directly map to LLVM IR instructions,
-          // assign opcodes after the last VPInstruction opcode (which is also
-          // after the last IR Instruction opcode), based on the VPDefID.
-          return std::make_pair(false,
-                                VPInstruction::OpsEnd + 1 + I->getVPDefID());
-        })
-        .Default([](auto *) { return std::nullopt; });
-  }
-
   /// If recipe \p R will lower to a GEP with a non-i8 source element type,
   /// return that source element type.
   static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) {
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index e007800..ac654dd 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-VSCALE-1
-; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-VSCALE-2
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefixes=CHECK-VSCALE-2,CHECK-SVE
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve2p1 | FileCheck %s --check-prefixes=CHECK-VSCALE-2,CHECK-SVE2p1-OR-SME2
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sme2 | FileCheck %s --check-prefixes=CHECK-VSCALE-2,CHECK-SVE2p1-OR-SME2
 ; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -intrinsic-cost-strategy=type-based-intrinsic-cost -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=TYPE_BASED_ONLY
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -920,7 +922,8 @@ define void @get_lane_mask() #0 {
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 13 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 5 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison)
@@ -934,28 +937,53 @@ define void @get_lane_mask() #0 {
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-; CHECK-VSCALE-2-LABEL: 'get_lane_mask'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-SVE-LABEL: 'get_lane_mask'
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 13 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 5 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison)
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-SVE2p1-OR-SME2-LABEL: 'get_lane_mask'
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 5 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'get_lane_mask'
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison)
@@ -966,7 +994,8 @@ define void @get_lane_mask() #0 {
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 13 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 5 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison)
@@ -990,6 +1019,7 @@ define void @get_lane_mask() #0 {
   %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
   %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
 
+  %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
   %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
   %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
 
@@ -1416,6 +1446,7 @@ declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32, i32)
 declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32, i32)
 declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32, i32)
 declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32, i32)
+declare <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64, i64)
 declare <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64, i64)
 declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16, i16)
 declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64, i64)
diff --git a/llvm/test/Analysis/CostModel/ARM/add-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/add-cast-vect.ll
index 1e9a5f7..c0410cf 100644
--- a/llvm/test/Analysis/CostModel/ARM/add-cast-vect.ll
+++ b/llvm/test/Analysis/CostModel/ARM/add-cast-vect.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s  -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+
 ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
 ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
 
@@ -15,13 +17,18 @@ target triple = "armv7--linux-gnueabihf"
 %T464 = type <4 x i64>
 
 define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
-; COST: function 'direct'
+; COST-LABEL: 'direct'
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = add <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
   %v0 = load %T432, ptr %loadaddr
 ; ASM: vld1.64
   %v1 = load %T432, ptr %loadaddr2
 ; ASM: vld1.64
-  %r3 = add %T432 %v0, %v1 
-; COST: cost of 1 for instruction: {{.*}} add <4 x i32>
+  %r3 = add %T432 %v0, %v1
 ; ASM: vadd.i32
   store %T432 %r3, ptr %storeaddr
 ; ASM: vst1.64
@@ -29,16 +36,22 @@ define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 }
 
 define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
-; COST: function 'ups1632'
+; COST-LABEL: 'ups1632'
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r1 = sext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = add <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
   %v0 = load %T416, ptr %loadaddr
 ; ASM: vldr
   %v1 = load %T416, ptr %loadaddr2
 ; ASM: vldr
   %r1 = sext %T416 %v0 to %T432
   %r2 = sext %T416 %v1 to %T432
-; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
-  %r3 = add %T432 %r1, %r2 
-; COST: cost of 1 for instruction: {{.*}} add <4 x i32>
+  %r3 = add %T432 %r1, %r2
 ; ASM: vaddl.s16
   store %T432 %r3, ptr %storeaddr
 ; ASM: vst1.64
@@ -46,16 +59,22 @@ define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 }
 
 define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
-; COST: function 'upu1632'
+; COST-LABEL: 'upu1632'
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r2 = zext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = add <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
   %v0 = load %T416, ptr %loadaddr
 ; ASM: vldr
   %v1 = load %T416, ptr %loadaddr2
 ; ASM: vldr
   %r1 = zext %T416 %v0 to %T432
   %r2 = zext %T416 %v1 to %T432
-; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
-  %r3 = add %T432 %r1, %r2 
-; COST: cost of 1 for instruction: {{.*}} add <4 x i32>
+  %r3 = add %T432 %r1, %r2
 ; ASM: vaddl.u16
   store %T432 %r3, ptr %storeaddr
 ; ASM: vst1.64
@@ -63,51 +82,66 @@ define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 }
 
 define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
-; COST: function 'ups3264'
+; COST-LABEL: 'ups3264'
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = add <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: %st = sext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
   %v0 = load %T232, ptr %loadaddr
 ; ASM: vldr
   %v1 = load %T232, ptr %loadaddr2
 ; ASM: vldr
-  %r3 = add %T232 %v0, %v1 
+  %r3 = add %T232 %v0, %v1
 ; ASM: vadd.i32
-; COST: cost of 1 for instruction: {{.*}} add <2 x i32>
   %st = sext %T232 %r3 to %T264
 ; ASM: vmovl.s32
-; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
   store %T264 %st, ptr %storeaddr
 ; ASM: vst1.64
   ret void
 }
 
 define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
-; COST: function 'upu3264'
+; COST-LABEL: 'upu3264'
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = add <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: %st = zext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
   %v0 = load %T232, ptr %loadaddr
 ; ASM: vldr
   %v1 = load %T232, ptr %loadaddr2
 ; ASM: vldr
-  %r3 = add %T232 %v0, %v1 
+  %r3 = add %T232 %v0, %v1
 ; ASM: vadd.i32
-; COST: cost of 1 for instruction: {{.*}} add <2 x i32>
   %st = zext %T232 %r3 to %T264
 ; ASM: vmovl.u32
-; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
   store %T264 %st, ptr %storeaddr
 ; ASM: vst1.64
   ret void
 }
 
 define void @dn3216(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
-; COST: function 'dn3216'
+; COST-LABEL: 'dn3216'
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = add <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: %st = trunc <4 x i32> %r3 to <4 x i16>
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i16> %st, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
   %v0 = load %T432, ptr %loadaddr
 ; ASM: vld1.64
   %v1 = load %T432, ptr %loadaddr2
 ; ASM: vld1.64
-  %r3 = add %T432 %v0, %v1 
+  %r3 = add %T432 %v0, %v1
 ; ASM: vadd.i32
-; COST: cost of 1 for instruction: {{.*}} add <4 x i32>
   %st = trunc %T432 %r3 to %T416
 ; ASM: vmovn.i32
-; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
   store %T416 %st, ptr %storeaddr
 ; ASM: vstr
   ret void
diff --git a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll
index c2248c2..e5bbac6 100644
--- a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll
+++ b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 < %s | FileCheck %s --check-prefix=CHECK-NEON
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 < %s | FileCheck %s --check-prefix=CHECK-NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
diff --git a/llvm/test/Analysis/CostModel/ARM/freeshift.ll b/llvm/test/Analysis/CostModel/ARM/freeshift.ll
index 51e87b5..cd5c8c5 100644
--- a/llvm/test/Analysis/CostModel/ARM/freeshift.ll
+++ b/llvm/test/Analysis/CostModel/ARM/freeshift.ll
@@ -1,23 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @shl(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'shl'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %as = shl i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ss = shl i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %xs = shl i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ns = shl i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %os = shl i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %is = shl i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %as = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ac = add i32 %b, %as
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %ss = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sc = sub i32 %b, %ss
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %xs = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %xc = xor i32 %b, %xs
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %ns = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nc = and i32 %b, %ns
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %os = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %oc = or i32 %b, %os
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %is = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ic = icmp eq i32 %b, %is
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
   %as = shl i32 %a, 3
   %ac = add i32 %b, %as
@@ -36,19 +36,19 @@ define void @shl(i32 %a, i32 %b) {
 
 define void @ashr(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'ashr'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %as = ashr i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ss = ashr i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %xs = ashr i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ns = ashr i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %os = ashr i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %is = ashr i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %as = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ac = add i32 %b, %as
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %ss = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sc = sub i32 %b, %ss
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %xs = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %xc = xor i32 %b, %xs
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %ns = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nc = and i32 %b, %ns
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %os = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %oc = or i32 %b, %os
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %is = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ic = icmp eq i32 %b, %is
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
   %as = ashr i32 %a, 3
   %ac = add i32 %b, %as
@@ -67,19 +67,19 @@ define void @ashr(i32 %a, i32 %b) {
 
 define void @lshr(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'lshr'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %as = lshr i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ss = lshr i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %xs = lshr i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ns = lshr i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %os = lshr i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %is = lshr i32 %a, 3
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %as = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ac = add i32 %b, %as
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %ss = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sc = sub i32 %b, %ss
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %xs = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %xc = xor i32 %b, %xs
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %ns = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nc = and i32 %b, %ns
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %os = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %oc = or i32 %b, %os
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %is = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ic = icmp eq i32 %b, %is
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
   %as = lshr i32 %a, 3
   %ac = add i32 %b, %as
diff --git a/llvm/test/Analysis/CostModel/ARM/gep.ll b/llvm/test/Analysis/CostModel/ARM/gep.ll
index 48de193..cce87a5 100644
--- a/llvm/test/Analysis/CostModel/ARM/gep.ll
+++ b/llvm/test/Analysis/CostModel/ARM/gep.ll
@@ -1,98 +1,98 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V6M
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-V7M-NOFP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-V7M-FP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVEFP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-T32
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=arm-none-eabi -mcpu=cortex-a53 < %s | FileCheck %s --check-prefix=CHECK-A32
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V6M
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-V7M-NOFP
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-V7M-FP
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVEFP
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-T32
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=arm-none-eabi -mcpu=cortex-a53 < %s | FileCheck %s --check-prefix=CHECK-A32
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @testi8(ptr %a, i32 %i) {
 ; CHECK-V6M-LABEL: 'testi8'
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-NOFP-LABEL: 'testi8'
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-FP-LABEL: 'testi8'
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'testi8'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'testi8'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-T32-LABEL: 'testi8'
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-A32-LABEL: 'testi8'
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a1 = getelementptr inbounds i8, ptr %a, i32 1
   %am4 = getelementptr inbounds i8, ptr %a, i32 -1
@@ -109,88 +109,88 @@ define void @testi8(ptr %a, i32 %i) {
 
 define void @testi16(ptr %a, i32 %i) {
 ; CHECK-V6M-LABEL: 'testi16'
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-NOFP-LABEL: 'testi16'
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-FP-LABEL: 'testi16'
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'testi16'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'testi16'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-T32-LABEL: 'testi16'
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-A32-LABEL: 'testi16'
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a1 = getelementptr inbounds i16, ptr %a, i32 1
   %am4 = getelementptr inbounds i16, ptr %a, i32 -1
@@ -207,88 +207,88 @@ define void @testi16(ptr %a, i32 %i) {
 
 define void @testi32(ptr %a, i32 %i) {
 ; CHECK-V6M-LABEL: 'testi32'
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-NOFP-LABEL: 'testi32'
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-FP-LABEL: 'testi32'
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'testi32'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'testi32'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-T32-LABEL: 'testi32'
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-A32-LABEL: 'testi32'
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a1 = getelementptr inbounds i32, ptr %a, i32 1
   %am4 = getelementptr inbounds i32, ptr %a, i32 -1
@@ -305,102 +305,102 @@ define void @testi32(ptr %a, i32 %i) {
 
 define void @testi64(ptr %a, i32 %i) {
 ; CHECK-V6M-LABEL: 'testi64'
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-NOFP-LABEL: 'testi64'
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-FP-LABEL: 'testi64'
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'testi64'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'testi64'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-T32-LABEL: 'testi64'
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-A32-LABEL: 'testi64'
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a1 = getelementptr inbounds i64, ptr %a, i32 1
   %am4 = getelementptr inbounds i64, ptr %a, i32 -1
@@ -419,102 +419,102 @@ define void @testi64(ptr %a, i32 %i) {
 
 define void @testhalf(ptr %a, i32 %i) {
 ; CHECK-V6M-LABEL: 'testhalf'
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-NOFP-LABEL: 'testhalf'
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-FP-LABEL: 'testhalf'
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'testhalf'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'testhalf'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-T32-LABEL: 'testhalf'
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-A32-LABEL: 'testhalf'
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a1 = getelementptr inbounds half, ptr %a, i32 1
   %am1 = getelementptr inbounds half, ptr %a, i32 -1
@@ -533,102 +533,102 @@ define void @testhalf(ptr %a, i32 %i) {
 
 define void @testfloat(ptr %a, i32 %i) {
 ; CHECK-V6M-LABEL: 'testfloat'
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-NOFP-LABEL: 'testfloat'
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-FP-LABEL: 'testfloat'
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'testfloat'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'testfloat'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-T32-LABEL: 'testfloat'
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-A32-LABEL: 'testfloat'
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a1 = getelementptr inbounds float, ptr %a, i32 1
   %am1 = getelementptr inbounds float, ptr %a, i32 -1
@@ -647,102 +647,102 @@ define void @testfloat(ptr %a, i32 %i) {
 
 define void @testdouble(ptr %a, i32 %i) {
 ; CHECK-V6M-LABEL: 'testdouble'
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-NOFP-LABEL: 'testdouble'
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-FP-LABEL: 'testdouble'
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'testdouble'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'testdouble'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-T32-LABEL: 'testdouble'
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-A32-LABEL: 'testdouble'
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a1 = getelementptr inbounds double, ptr %a, i32 1
   %am1 = getelementptr inbounds double, ptr %a, i32 -1
@@ -761,375 +761,375 @@ define void @testdouble(ptr %a, i32 %i) {
 
 define void @testvecs(i32 %i) {
 ; CHECK-V6M-LABEL: 'testvecs'
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
-; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 0 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
+; CHECK-V6M-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-NOFP-LABEL: 'testvecs'
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
-; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 0 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V7M-FP-LABEL: 'testvecs'
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
-; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 0 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'testvecs'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'testvecs'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 0 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-T32-LABEL: 'testvecs'
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-T32-NEXT:  Cost Model: Found costs of 0 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
+; CHECK-T32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-A32-LABEL: 'testvecs'
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-A32-NEXT:  Cost Model: Found costs of 0 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
+; CHECK-A32-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 
   %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
diff --git a/llvm/test/Analysis/CostModel/ARM/immediates.ll b/llvm/test/Analysis/CostModel/ARM/immediates.ll
index ed13636..cd42313 100644
--- a/llvm/test/Analysis/CostModel/ARM/immediates.ll
+++ b/llvm/test/Analysis/CostModel/ARM/immediates.ll
@@ -1,145 +1,53 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size  -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1-SIZE
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size  -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-T2-SIZE
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency    -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1-LATENCY
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency    -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-T2-LATENCY
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1-THROUGHPUT
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-T2-THROUGHPUT
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output  -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output  -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-T2
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @const_costs() {
-; CHECK-T1-SIZE-LABEL: 'const_costs'
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 1
+; CHECK-T1-LABEL: 'const_costs'
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %add_1 = add i32 undef, 1
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %add_32767 = add i32 undef, 32767
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %sub_1 = sub i32 undef, 1
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %sub_32768 = sub i32 undef, 32768
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %mul_2 = mul i32 undef, 2
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %mul_3 = mul i32 undef, 3
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %mul_27 = mul i32 undef, 27
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %and_255 = and i32 undef, 255
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %and_65535 = and i32 undef, 65535
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %and_1 = and i32 undef, 1
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %xor_1 = xor i32 undef, 1
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %xor_7 = xor i32 undef, 7
+; CHECK-T1-NEXT:  Cost Model: Found costs of 0 for: %gep_1 = getelementptr i32, ptr undef, i32 1
+; CHECK-T1-NEXT:  Cost Model: Found costs of 0 for: %gep_16 = getelementptr i32, ptr undef, i32 16
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %cmp_244 = icmp ne i32 undef, 244
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %cmp_256 = icmp uge i32 undef, 256
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %cmp_1024 = icmp ult i32 undef, 1024
+; CHECK-T1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %select_1_0 = select i1 undef, i32 1, i32 0
+; CHECK-T1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %select_7_255 = select i1 undef, i32 7, i32 255
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: ret i32 1
 ;
-; CHECK-T2-SIZE-LABEL: 'const_costs'
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255
-; CHECK-T2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 1
-;
-; CHECK-T1-LATENCY-LABEL: 'const_costs'
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 1
-;
-; CHECK-T2-LATENCY-LABEL: 'const_costs'
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255
-; CHECK-T2-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 1
-;
-; CHECK-T1-THROUGHPUT-LABEL: 'const_costs'
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 1
-;
-; CHECK-T2-THROUGHPUT-LABEL: 'const_costs'
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255
-; CHECK-T2-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 1
+; CHECK-T2-LABEL: 'const_costs'
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %add_1 = add i32 undef, 1
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %add_32767 = add i32 undef, 32767
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %sub_1 = sub i32 undef, 1
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %sub_32768 = sub i32 undef, 32768
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %mul_2 = mul i32 undef, 2
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %mul_3 = mul i32 undef, 3
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %mul_27 = mul i32 undef, 27
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %and_255 = and i32 undef, 255
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %and_65535 = and i32 undef, 65535
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %and_1 = and i32 undef, 1
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %xor_1 = xor i32 undef, 1
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %xor_7 = xor i32 undef, 7
+; CHECK-T2-NEXT:  Cost Model: Found costs of 0 for: %gep_1 = getelementptr i32, ptr undef, i32 1
+; CHECK-T2-NEXT:  Cost Model: Found costs of 0 for: %gep_16 = getelementptr i32, ptr undef, i32 16
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %cmp_244 = icmp ne i32 undef, 244
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %cmp_256 = icmp uge i32 undef, 256
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: %cmp_1024 = icmp ult i32 undef, 1024
+; CHECK-T2-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %select_1_0 = select i1 undef, i32 1, i32 0
+; CHECK-T2-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %select_7_255 = select i1 undef, i32 7, i32 255
+; CHECK-T2-NEXT:  Cost Model: Found costs of 1 for: ret i32 1
 ;
   %add_1 = add i32 undef, 1
   %add_32767 = add i32 undef, 32767
diff --git a/llvm/test/Analysis/CostModel/ARM/insertelement.ll b/llvm/test/Analysis/CostModel/ARM/insertelement.ll
index 5a922dd..f14b200 100644
--- a/llvm/test/Analysis/CostModel/ARM/insertelement.ll
+++ b/llvm/test/Analysis/CostModel/ARM/insertelement.ll
@@ -1,4 +1,5 @@
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios6.0.0"
@@ -7,12 +8,16 @@ target triple = "thumbv7-apple-ios6.0.0"
 ; due to renaming constraints.
 %T_i8v = type <8 x i8>
 %T_i8 = type i8
-; CHECK: insertelement_i8
-define void @insertelement_i8(ptr %saddr,
-                           ptr %vaddr) {
+define void @insertelement_i8(ptr %saddr, ptr %vaddr) {
+; CHECK-LABEL: 'insertelement_i8'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <8 x i8>, ptr %vaddr, align 4
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i8, ptr %saddr, align 1
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2 = insertelement <8 x i8> %v0, i8 %v1, i32 1
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: store <8 x i8> %v2, ptr %vaddr, align 4
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
   %v0 = load %T_i8v, ptr %vaddr
   %v1 = load %T_i8, ptr %saddr
-;CHECK: estimated cost of 3 for {{.*}} insertelement <8 x i8>
   %v2 = insertelement %T_i8v %v0, %T_i8 %v1, i32 1
   store %T_i8v %v2, ptr %vaddr
   ret void
@@ -21,12 +26,16 @@ define void @insertelement_i8(ptr %saddr,
 
 %T_i16v = type <4 x i16>
 %T_i16 = type i16
-; CHECK: insertelement_i16
-define void @insertelement_i16(ptr %saddr,
-                           ptr %vaddr) {
+define void @insertelement_i16(ptr %saddr, ptr %vaddr) {
+; CHECK-LABEL: 'insertelement_i16'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %vaddr, align 4
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i16, ptr %saddr, align 2
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2 = insertelement <4 x i16> %v0, i16 %v1, i32 1
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: store <4 x i16> %v2, ptr %vaddr, align 4
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
   %v0 = load %T_i16v, ptr %vaddr
   %v1 = load %T_i16, ptr %saddr
-;CHECK: estimated cost of 3 for {{.*}} insertelement <4 x i16>
   %v2 = insertelement %T_i16v %v0, %T_i16 %v1, i32 1
   store %T_i16v %v2, ptr %vaddr
   ret void
@@ -34,12 +43,16 @@ define void @insertelement_i16(ptr %saddr,
 
 %T_i32v = type <2 x i32>
 %T_i32 = type i32
-; CHECK: insertelement_i32
-define void @insertelement_i32(ptr %saddr,
-                           ptr %vaddr) {
+define void @insertelement_i32(ptr %saddr, ptr %vaddr) {
+; CHECK-LABEL: 'insertelement_i32'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %vaddr, align 4
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i32, ptr %saddr, align 4
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2 = insertelement <2 x i32> %v0, i32 %v1, i32 1
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: store <2 x i32> %v2, ptr %vaddr, align 4
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
   %v0 = load %T_i32v, ptr %vaddr
   %v1 = load %T_i32, ptr %saddr
-;CHECK: estimated cost of 3 for {{.*}} insertelement <2 x i32>
   %v2 = insertelement %T_i32v %v0, %T_i32 %v1, i32 1
   store %T_i32v %v2, ptr %vaddr
   ret void
diff --git a/llvm/test/Analysis/CostModel/ARM/load-to-trunc.ll b/llvm/test/Analysis/CostModel/ARM/load-to-trunc.ll
index 4404209..c98601f 100644
--- a/llvm/test/Analysis/CostModel/ARM/load-to-trunc.ll
+++ b/llvm/test/Analysis/CostModel/ARM/load-to-trunc.ll
@@ -1,17 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8r-none-eabi < %s | FileCheck %s
+
 ; Check memory cost model action for a load of an unusually sized integer
 ; follow by and a trunc to a register sized integer gives a cost of 1 rather
 ; than the expanded cost if it is not.  Currently, this target does not have
 ; that expansion.
 
-; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK
-
 ; Check that cost is 1 for unusual load to register sized load.
 define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
 ; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %out = load i128, ptr %ptr, align 8
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc = trunc i128 %out to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: ret i32 %trunc
 ;
   %out = load i128, ptr %ptr
   %trunc = trunc i128 %out to i32
@@ -20,8 +20,8 @@ define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
 
 define i128 @loadUnusualInteger(ptr %ptr) {
 ; CHECK-LABEL: 'loadUnusualInteger'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %out = load i128, ptr %ptr, align 8
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: ret i128 %out
 ;
   %out = load i128, ptr %ptr
   ret i128 %out
diff --git a/llvm/test/Analysis/CostModel/ARM/load_store.ll b/llvm/test/Analysis/CostModel/ARM/load_store.ll
index 4c322e9..dd2eaae 100644
--- a/llvm/test/Analysis/CostModel/ARM/load_store.ll
+++ b/llvm/test/Analysis/CostModel/ARM/load_store.ll
@@ -1,171 +1,117 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECK-NOVEC
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-NOVEC
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-FP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-NEON
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=arm-none-eabi -mcpu=cortex-a53 < %s | FileCheck %s --check-prefix=CHECK-NEON
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=CHECK-V8-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE-SIZE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECK-NOVEC
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-NOVEC
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-FP
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=arm-none-eabi -mcpu=cortex-a53 < %s | FileCheck %s --check-prefix=CHECK-NEON
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @stores() {
 ; CHECK-NOVEC-LABEL: 'stores'
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store double undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> undef, ptr undef, align 2
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, ptr undef, align 2
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> undef, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x double> undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of 1 for: store i8 undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of 1 for: store i16 undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of 1 for: store i32 undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store i64 undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store i128 undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of 1 for: store float undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store double undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i8> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i16> undef, ptr undef, align 2
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i32> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i64> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> undef, ptr undef, align 2
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i8> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x float> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x double> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x float> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i64> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x float> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-FP-LABEL: 'stores'
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> undef, ptr undef, align 2
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, ptr undef, align 2
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> undef, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x double> undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-FP-NEXT:  Cost Model: Found costs of 1 for: store i8 undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of 1 for: store i16 undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of 1 for: store i32 undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store i64 undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store i128 undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of 1 for: store float undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of 1 for: store double undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i8> undef, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i16> undef, ptr undef, align 2
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i32> undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i64> undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> undef, ptr undef, align 2
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i8> undef, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x float> undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x double> undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x float> undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i64> undef, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> undef, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> undef, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x float> undef, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'stores'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> undef, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> undef, ptr undef, align 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: store <2 x i32> undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i64> undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, ptr undef, align 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> undef, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x double> undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <2 x float> undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i64> undef, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> undef, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: store i8 undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: store i16 undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: store i32 undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store i64 undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store i128 undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: store float undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: store double undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i8> undef, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i16> undef, ptr undef, align 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i32> undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i64> undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> undef, ptr undef, align 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i8> undef, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x float> undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x double> undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x float> undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i64> undef, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> undef, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> undef, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x float> undef, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-NEON-LABEL: 'stores'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> undef, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> undef, ptr undef, align 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x double> undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; CHECK-V8-SIZE-LABEL: 'stores'
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i128 undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 2
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 2
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-MVE-SIZE-LABEL: 'stores'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i128 undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 2
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 2
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store i8 undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store i16 undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store i32 undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store i64 undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store i128 undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store float undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store double undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i8> undef, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i16> undef, ptr undef, align 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store <2 x i32> undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store <2 x i64> undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store <8 x i16> undef, ptr undef, align 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store <16 x i8> undef, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store <4 x float> undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x double> undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store <2 x float> undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store <2 x i64> undef, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> undef, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store <8 x i16> undef, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store <4 x float> undef, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   store i8 undef, ptr undef, align 4
   store i16 undef, ptr undef, align 4
@@ -199,160 +145,108 @@ define void @stores() {
 
 define void @loads() {
 ; CHECK-NOVEC-LABEL: 'loads'
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = load double, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = load <2 x i8>, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = load <2 x i16>, ptr undef, align 2
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = load <2 x i32>, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = load <2 x i64>, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %12 = load <4 x i32>, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %13 = load <8 x i16>, ptr undef, align 2
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %14 = load <16 x i8>, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = load <4 x float>, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %16 = load <4 x double>, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = load <2 x float>, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = load <2 x double>, ptr undef, align 4
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = load <2 x i64>, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = load <4 x i32>, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %21 = load <8 x i16>, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %22 = load <4 x float>, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x double>, ptr undef, align 1
-; CHECK-NOVEC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %1 = load i8, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %2 = load i16, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %3 = load i32, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %4 = load i64, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %5 = load i128, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %6 = load float, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %7 = load double, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %8 = load <2 x i8>, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %9 = load <2 x i16>, ptr undef, align 2
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %10 = load <2 x i32>, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %11 = load <2 x i64>, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %12 = load <4 x i32>, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %13 = load <8 x i16>, ptr undef, align 2
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %14 = load <16 x i8>, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %15 = load <4 x float>, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %16 = load <4 x double>, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %17 = load <2 x float>, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %18 = load <2 x double>, ptr undef, align 4
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %19 = load <2 x i64>, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %20 = load <4 x i32>, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %21 = load <8 x i16>, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %22 = load <4 x float>, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %23 = load <2 x double>, ptr undef, align 1
+; CHECK-NOVEC-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-FP-LABEL: 'loads'
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = load <2 x i8>, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = load <2 x i16>, ptr undef, align 2
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = load <2 x i32>, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = load <2 x i64>, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %12 = load <4 x i32>, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %13 = load <8 x i16>, ptr undef, align 2
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %14 = load <16 x i8>, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = load <4 x float>, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %16 = load <4 x double>, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = load <2 x float>, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = load <2 x double>, ptr undef, align 4
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = load <2 x i64>, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = load <4 x i32>, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %21 = load <8 x i16>, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %22 = load <4 x float>, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = load <2 x double>, ptr undef, align 1
-; CHECK-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %1 = load i8, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %2 = load i16, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %3 = load i32, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %4 = load i64, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %5 = load i128, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %6 = load float, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %7 = load double, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %8 = load <2 x i8>, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %9 = load <2 x i16>, ptr undef, align 2
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %10 = load <2 x i32>, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %11 = load <2 x i64>, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %12 = load <4 x i32>, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %13 = load <8 x i16>, ptr undef, align 2
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %14 = load <16 x i8>, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %15 = load <4 x float>, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %16 = load <4 x double>, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %17 = load <2 x float>, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %18 = load <2 x double>, ptr undef, align 4
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %19 = load <2 x i64>, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %20 = load <4 x i32>, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %21 = load <8 x i16>, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %22 = load <4 x float>, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %23 = load <2 x double>, ptr undef, align 1
+; CHECK-FP-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'loads'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %8 = load <2 x i8>, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %9 = load <2 x i16>, ptr undef, align 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %10 = load <2 x i32>, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = load <2 x i64>, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = load <4 x i32>, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = load <8 x i16>, ptr undef, align 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = load <16 x i8>, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <4 x float>, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %16 = load <4 x double>, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %17 = load <2 x float>, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = load <2 x double>, ptr undef, align 4
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <2 x i64>, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <4 x i32>, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i16>, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x float>, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = load <2 x double>, ptr undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %1 = load i8, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %2 = load i16, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %3 = load i32, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %4 = load i64, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %5 = load i128, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %6 = load float, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %7 = load double, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:4 SizeLat:1 for: %8 = load <2 x i8>, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:4 SizeLat:1 for: %9 = load <2 x i16>, ptr undef, align 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:4 SizeLat:1 for: %10 = load <2 x i32>, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %11 = load <2 x i64>, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %12 = load <4 x i32>, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %13 = load <8 x i16>, ptr undef, align 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %14 = load <16 x i8>, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %15 = load <4 x float>, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %16 = load <4 x double>, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:4 SizeLat:1 for: %17 = load <2 x float>, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %18 = load <2 x double>, ptr undef, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %19 = load <2 x i64>, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %20 = load <4 x i32>, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %21 = load <8 x i16>, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %22 = load <4 x float>, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %23 = load <2 x double>, ptr undef, align 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-NEON-LABEL: 'loads'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, ptr undef, align 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, ptr undef, align 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %16 = load <4 x double>, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = load <2 x double>, ptr undef, align 4
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x double>, ptr undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; CHECK-V8-SIZE-LABEL: 'loads'
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = load i128, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, ptr undef, align 2
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, ptr undef, align 2
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x double>, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x double>, ptr undef, align 4
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = load <2 x double>, ptr undef, align 1
-; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-MVE-SIZE-LABEL: 'loads'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = load i128, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, ptr undef, align 2
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, ptr undef, align 2
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x double>, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x double>, ptr undef, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = load <2 x double>, ptr undef, align 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %1 = load i8, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %2 = load i16, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %3 = load i32, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %4 = load i64, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %5 = load i128, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %6 = load float, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %7 = load double, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %8 = load <2 x i8>, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %9 = load <2 x i16>, ptr undef, align 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %10 = load <2 x i32>, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %11 = load <2 x i64>, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %12 = load <4 x i32>, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %13 = load <8 x i16>, ptr undef, align 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %14 = load <16 x i8>, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %15 = load <4 x float>, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %16 = load <4 x double>, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %17 = load <2 x float>, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %18 = load <2 x double>, ptr undef, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %19 = load <2 x i64>, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %20 = load <4 x i32>, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %21 = load <8 x i16>, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %22 = load <4 x float>, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %23 = load <2 x double>, ptr undef, align 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   load i8, ptr undef, align 4
   load i16, ptr undef, align 4
diff --git a/llvm/test/Analysis/CostModel/ARM/logicalop.ll b/llvm/test/Analysis/CostModel/ARM/logicalop.ll
index 967426c..82eb716 100644
--- a/llvm/test/Analysis/CostModel/ARM/logicalop.ll
+++ b/llvm/test/Analysis/CostModel/ARM/logicalop.ll
@@ -1,72 +1,40 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE-RECIP
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON-RECIP
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1-RECIP
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2-RECIP
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE-SIZE
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON-SIZE
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1-SIZE
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2-SIZE
+; RUN: opt < %s  -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt < %s  -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON
+; RUN: opt < %s  -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1
+; RUN: opt < %s  -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @op() {
   ; Logical and/or - select's cost must be equivalent to that of binop
-; CHECK-MVE-RECIP-LABEL: 'op'
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select i1 undef, i1 undef, i1 false
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and i1 undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select i1 undef, i1 true, i1 undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or i1 undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; CHECK-NEON-RECIP-LABEL: 'op'
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select i1 undef, i1 undef, i1 false
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and i1 undef, undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select i1 undef, i1 true, i1 undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or i1 undef, undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; CHECK-THUMB1-RECIP-LABEL: 'op'
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select i1 undef, i1 undef, i1 false
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and i1 undef, undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select i1 undef, i1 true, i1 undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or i1 undef, undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-THUMB2-RECIP-LABEL: 'op'
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select i1 undef, i1 undef, i1 false
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and i1 undef, undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select i1 undef, i1 true, i1 undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or i1 undef, undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-MVE-SIZE-LABEL: 'op'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sand = select i1 undef, i1 undef, i1 false
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %band = and i1 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sor = select i1 undef, i1 true, i1 undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %bor = or i1 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-NEON-SIZE-LABEL: 'op'
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sand = select i1 undef, i1 undef, i1 false
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %band = and i1 undef, undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sor = select i1 undef, i1 true, i1 undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %bor = or i1 undef, undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-THUMB1-SIZE-LABEL: 'op'
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sand = select i1 undef, i1 undef, i1 false
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %band = and i1 undef, undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sor = select i1 undef, i1 true, i1 undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %bor = or i1 undef, undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-THUMB2-SIZE-LABEL: 'op'
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sand = select i1 undef, i1 undef, i1 false
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %band = and i1 undef, undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sor = select i1 undef, i1 true, i1 undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %bor = or i1 undef, undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-LABEL: 'op'
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %sand = select i1 undef, i1 undef, i1 false
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %band = and i1 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %sor = select i1 undef, i1 true, i1 undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %bor = or i1 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-NEON-LABEL: 'op'
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %sand = select i1 undef, i1 undef, i1 false
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %band = and i1 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %sor = select i1 undef, i1 true, i1 undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %bor = or i1 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-THUMB1-LABEL: 'op'
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %sand = select i1 undef, i1 undef, i1 false
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %band = and i1 undef, undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %sor = select i1 undef, i1 true, i1 undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %bor = or i1 undef, undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of 1 for: ret void
+;
+; CHECK-THUMB2-LABEL: 'op'
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %sand = select i1 undef, i1 undef, i1 false
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %band = and i1 undef, undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %sor = select i1 undef, i1 true, i1 undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %bor = or i1 undef, undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
   %sand = select i1 undef, i1 undef, i1 false
   %band = and i1 undef, undef
@@ -77,61 +45,33 @@ define void @op() {
 }
 
 define void @vecop() {
-; CHECK-MVE-RECIP-LABEL: 'vecop'
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %bor = or <4 x i1> undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; CHECK-NEON-RECIP-LABEL: 'vecop'
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; CHECK-THUMB1-RECIP-LABEL: 'vecop'
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-THUMB2-RECIP-LABEL: 'vecop'
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-MVE-SIZE-LABEL: 'vecop'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-NEON-SIZE-LABEL: 'vecop'
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-THUMB1-SIZE-LABEL: 'vecop'
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-THUMB2-SIZE-LABEL: 'vecop'
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-LABEL: 'vecop'
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %band = and <4 x i1> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %bor = or <4 x i1> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-NEON-LABEL: 'vecop'
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %band = and <4 x i1> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %bor = or <4 x i1> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-THUMB1-LABEL: 'vecop'
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of 4 for: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of 4 for: %band = and <4 x i1> undef, undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of 4 for: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of 4 for: %bor = or <4 x i1> undef, undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of 1 for: ret void
+;
+; CHECK-THUMB2-LABEL: 'vecop'
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of 4 for: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of 4 for: %band = and <4 x i1> undef, undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of 4 for: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of 4 for: %bor = or <4 x i1> undef, undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
   %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> <i1 false, i1 false, i1 false, i1 false>
   %band = and <4 x i1> undef, undef
diff --git a/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll
index 17d4263..07d2bd0 100644
--- a/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+; RUN: opt < %s  -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+
 ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
 ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
 
@@ -17,11 +18,11 @@ target triple = "armv7--linux-gnueabihf"
 
 define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'direct'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %v0, %v1
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = mul <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T432, ptr %loadaddr
 ; ASM: vld1.64
@@ -36,13 +37,13 @@ define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'ups1632'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32>
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32>
-; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %r1, %r2
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r1 = sext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = mul <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T416, ptr %loadaddr
 ; ASM: vldr
@@ -59,13 +60,13 @@ define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'upu1632'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32>
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32>
-; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %r1, %r2
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r2 = zext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = mul <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T416, ptr %loadaddr
 ; ASM: vldr
@@ -82,12 +83,12 @@ define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'ups3264'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = mul <2 x i32> %v0, %v1
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64>
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = mul <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: %st = sext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T232, ptr %loadaddr
 ; ASM: vldr
@@ -104,12 +105,12 @@ define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'upu3264'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = mul <2 x i32> %v0, %v1
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64>
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = mul <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: %st = zext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T232, ptr %loadaddr
 ; ASM: vldr
@@ -126,12 +127,12 @@ define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @dn3216(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'dn3216'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %v0, %v1
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16>
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = mul <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: %st = trunc <4 x i32> %r3 to <4 x i16>
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i16> %st, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T432, ptr %loadaddr
 ; ASM: vld1.64
diff --git a/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll b/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll
index 7de2799..3ae02cd 100644
--- a/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll
+++ b/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll
@@ -1,21 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp  < %s | FileCheck %s
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp  < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP
 
 define i64 @test(i16 %a, i16 %b) {
 ; CHECK-LABEL: 'test'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %as = sext i16 %a to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bs = sext i16 %b to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %m = mul i32 %as, %bs
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: ret i64 %ms
 ;
 ; CHECK-NO-DSP-LABEL: 'test'
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %as = sext i16 %a to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %bs = sext i16 %b to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: ret i64 %ms
 ;
     %as = sext i16 %a to i32
     %bs = sext i16 %b to i32
@@ -26,20 +26,20 @@ define i64 @test(i16 %a, i16 %b) {
 
 define i64 @withadd(i16 %a, i16 %b, i64 %c) {
 ; CHECK-LABEL: 'withadd'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %as = sext i16 %a to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bs = sext i16 %b to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %m = mul i32 %as, %bs
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = add i64 %c, %ms
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: ret i64 %r
 ;
 ; CHECK-NO-DSP-LABEL: 'withadd'
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %as = sext i16 %a to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %bs = sext i16 %b to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 2 for: %r = add i64 %c, %ms
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: ret i64 %r
 ;
     %as = sext i16 %a to i32
     %bs = sext i16 %b to i32
@@ -51,24 +51,24 @@ define i64 @withadd(i16 %a, i16 %b, i64 %c) {
 
 define i64 @withloads(ptr %pa, ptr %pb, i64 %c) {
 ; CHECK-LABEL: 'withloads'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %as = sext i16 %a to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bs = sext i16 %b to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %a = load i16, ptr %pa, align 2
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %b = load i16, ptr %pb, align 2
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %as = sext i16 %a to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %bs = sext i16 %b to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %m = mul i32 %as, %bs
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = add i64 %c, %ms
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: ret i64 %r
 ;
 ; CHECK-NO-DSP-LABEL: 'withloads'
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %as = sext i16 %a to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bs = sext i16 %b to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %a = load i16, ptr %pa, align 2
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %b = load i16, ptr %pb, align 2
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 0 for: %as = sext i16 %a to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 0 for: %bs = sext i16 %b to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 2 for: %r = add i64 %c, %ms
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: ret i64 %r
 ;
     %a = load i16, ptr %pa
     %b = load i16, ptr %pb
@@ -82,18 +82,18 @@ define i64 @withloads(ptr %pa, ptr %pb, i64 %c) {
 
 define i64 @different_extend_ops(i16 %a, i16 %b) {
 ; CHECK-LABEL: 'different_extend_ops'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %as = sext i16 %a to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bs = zext i16 %b to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: ret i64 %ms
 ;
 ; CHECK-NO-DSP-LABEL: 'different_extend_ops'
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %as = sext i16 %a to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %bs = zext i16 %b to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: ret i64 %ms
 ;
     %as = sext i16 %a to i32
     %bs = zext i16 %b to i32
diff --git a/llvm/test/Analysis/CostModel/ARM/muls-in-umull-patterns.ll b/llvm/test/Analysis/CostModel/ARM/muls-in-umull-patterns.ll
index 521816d13..04a9520 100644
--- a/llvm/test/Analysis/CostModel/ARM/muls-in-umull-patterns.ll
+++ b/llvm/test/Analysis/CostModel/ARM/muls-in-umull-patterns.ll
@@ -1,20 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp  < %s | FileCheck %s
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp  < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP
+
 define i64 @test(i16 %a, i16 %b) {
 ; CHECK-LABEL: 'test'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = zext i16 %a to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %as = zext i16 %a to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bs = zext i16 %b to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %m = mul i32 %as, %bs
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ms = zext i32 %m to i64
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: ret i64 %ms
 ;
 ; CHECK-NO-DSP-LABEL: 'test'
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = zext i16 %a to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %as = zext i16 %a to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %bs = zext i16 %b to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %ms = zext i32 %m to i64
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: ret i64 %ms
 ;
     %as = zext i16 %a to i32
     %bs = zext i16 %b to i32
@@ -25,20 +26,20 @@ define i64 @test(i16 %a, i16 %b) {
 
 define i64 @withadd(i16 %a, i16 %b, i64 %c) {
 ; CHECK-LABEL: 'withadd'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = zext i16 %a to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %as = zext i16 %a to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bs = zext i16 %b to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %m = mul i32 %as, %bs
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ms = zext i32 %m to i64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = add i64 %c, %ms
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: ret i64 %r
 ;
 ; CHECK-NO-DSP-LABEL: 'withadd'
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = zext i16 %a to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %as = zext i16 %a to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %bs = zext i16 %b to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %ms = zext i32 %m to i64
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 2 for: %r = add i64 %c, %ms
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: ret i64 %r
 ;
     %as = zext i16 %a to i32
     %bs = zext i16 %b to i32
@@ -50,24 +51,24 @@ define i64 @withadd(i16 %a, i16 %b, i64 %c) {
 
 define i64 @withloads(ptr %pa, ptr %pb, i64 %c) {
 ; CHECK-LABEL: 'withloads'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %as = zext i16 %a to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bs = zext i16 %b to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %a = load i16, ptr %pa, align 2
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %b = load i16, ptr %pb, align 2
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %as = zext i16 %a to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %bs = zext i16 %b to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %m = mul i32 %as, %bs
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %ms = zext i32 %m to i64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = add i64 %c, %ms
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: ret i64 %r
 ;
 ; CHECK-NO-DSP-LABEL: 'withloads'
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %as = zext i16 %a to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bs = zext i16 %b to i32
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
-; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %a = load i16, ptr %pa, align 2
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %b = load i16, ptr %pb, align 2
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 0 for: %as = zext i16 %a to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 0 for: %bs = zext i16 %b to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: %ms = zext i32 %m to i64
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 2 for: %r = add i64 %c, %ms
+; CHECK-NO-DSP-NEXT:  Cost Model: Found costs of 1 for: ret i64 %r
 ;
     %a = load i16, ptr %pa
     %b = load i16, ptr %pb
diff --git a/llvm/test/Analysis/CostModel/ARM/select.ll b/llvm/test/Analysis/CostModel/ARM/select.ll
index f626901..de429fd2 100644
--- a/llvm/test/Analysis/CostModel/ARM/select.ll
+++ b/llvm/test/Analysis/CostModel/ARM/select.ll
@@ -1,272 +1,140 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE-RECIP
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON-RECIP
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1-RECIP
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2-RECIP
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE-SIZE
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON-SIZE
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1-SIZE
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2-SIZE
+; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON
+; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1
+; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @selects() {
   ; Scalar values
-; CHECK-MVE-RECIP-LABEL: 'selects'
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = select i1 undef, i1 undef, i1 undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-LABEL: 'selects'
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %v0 = select i1 undef, i1 undef, i1 undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v1 = select i1 undef, i8 undef, i8 undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v2 = select i1 undef, i16 undef, i16 undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v3 = select i1 undef, i32 undef, i32 undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v4 = select i1 undef, i64 undef, i64 undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v5 = select i1 undef, float undef, float undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v6 = select i1 undef, double undef, double undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-; CHECK-NEON-RECIP-LABEL: 'selects'
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = select i1 undef, i1 undef, i1 undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-LABEL: 'selects'
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %v0 = select i1 undef, i1 undef, i1 undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v1 = select i1 undef, i8 undef, i8 undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v2 = select i1 undef, i16 undef, i16 undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v3 = select i1 undef, i32 undef, i32 undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v4 = select i1 undef, i64 undef, i64 undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v5 = select i1 undef, float undef, float undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v6 = select i1 undef, double undef, double undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 19 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 50 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 100 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-; CHECK-THUMB1-RECIP-LABEL: 'selects'
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = select i1 undef, i1 undef, i1 undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v6 = select i1 undef, double undef, double undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-THUMB1-LABEL: 'selects'
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %v0 = select i1 undef, i1 undef, i1 undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v1 = select i1 undef, i8 undef, i8 undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v2 = select i1 undef, i16 undef, i16 undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v3 = select i1 undef, i32 undef, i32 undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v4 = select i1 undef, i64 undef, i64 undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v5 = select i1 undef, float undef, float undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v6 = select i1 undef, double undef, double undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:24 Lat:16 SizeLat:16 for: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:48 Lat:32 SizeLat:32 for: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:24 Lat:16 SizeLat:16 for: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:48 Lat:32 SizeLat:32 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:24 Lat:16 SizeLat:16 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:48 Lat:32 SizeLat:32 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:9 Lat:6 SizeLat:6 for: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:25 Lat:20 SizeLat:20 for: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-THUMB1-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
-; CHECK-THUMB2-RECIP-LABEL: 'selects'
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = select i1 undef, i1 undef, i1 undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v6 = select i1 undef, double undef, double undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-MVE-SIZE-LABEL: 'selects'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v0 = select i1 undef, i1 undef, i1 undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2 = select i1 undef, i16 undef, i16 undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3 = select i1 undef, i32 undef, i32 undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v5 = select i1 undef, float undef, float undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v6 = select i1 undef, double undef, double undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-NEON-SIZE-LABEL: 'selects'
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v0 = select i1 undef, i1 undef, i1 undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2 = select i1 undef, i16 undef, i16 undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3 = select i1 undef, i32 undef, i32 undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v5 = select i1 undef, float undef, float undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v6 = select i1 undef, double undef, double undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-THUMB1-SIZE-LABEL: 'selects'
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v0 = select i1 undef, i1 undef, i1 undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2 = select i1 undef, i16 undef, i16 undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3 = select i1 undef, i32 undef, i32 undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v5 = select i1 undef, float undef, float undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v6 = select i1 undef, double undef, double undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-THUMB2-SIZE-LABEL: 'selects'
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v0 = select i1 undef, i1 undef, i1 undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2 = select i1 undef, i16 undef, i16 undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3 = select i1 undef, i32 undef, i32 undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v5 = select i1 undef, float undef, float undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v6 = select i1 undef, double undef, double undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-THUMB2-LABEL: 'selects'
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %v0 = select i1 undef, i1 undef, i1 undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v1 = select i1 undef, i8 undef, i8 undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v2 = select i1 undef, i16 undef, i16 undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v3 = select i1 undef, i32 undef, i32 undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v4 = select i1 undef, i64 undef, i64 undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v5 = select i1 undef, float undef, float undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v6 = select i1 undef, double undef, double undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:24 Lat:16 SizeLat:16 for: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:48 Lat:32 SizeLat:32 for: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:24 Lat:16 SizeLat:16 for: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:48 Lat:32 SizeLat:32 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:24 Lat:16 SizeLat:16 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:48 Lat:32 SizeLat:32 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:9 Lat:6 SizeLat:6 for: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:25 Lat:20 SizeLat:20 for: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-THUMB2-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
   %v0 = select i1 undef, i1 undef, i1 undef
   %v1 = select i1 undef, i8 undef, i8 undef
diff --git a/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll
index 465611d..5ef869e 100644
--- a/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll
+++ b/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+; RUN: opt < %s  -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+
 ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
 ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
 
@@ -17,11 +18,11 @@ target triple = "armv7--linux-gnueabihf"
 
 define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'direct'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %v0, %v1
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = shl <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T432, ptr %loadaddr
 ; ASM: vld1.64
@@ -36,13 +37,13 @@ define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'ups1632'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32>
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32>
-; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %r1, %r2
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r1 = sext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = shl <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T416, ptr %loadaddr
 ; ASM: vldr
@@ -59,13 +60,13 @@ define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'upu1632'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32>
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32>
-; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %r1, %r2
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r2 = zext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = shl <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T416, ptr %loadaddr
 ; ASM: vldr
@@ -82,12 +83,12 @@ define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'ups3264'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <2 x i32> %v0, %v1
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64>
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = shl <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: %st = sext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T232, ptr %loadaddr
 ; ASM: vldr
@@ -104,12 +105,12 @@ define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'upu3264'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <2 x i32> %v0, %v1
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64>
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = shl <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: %st = zext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T232, ptr %loadaddr
 ; ASM: vldr
@@ -126,12 +127,12 @@ define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @dn3216(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'dn3216'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %v0, %v1
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16>
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = shl <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: %st = trunc <4 x i32> %r3 to <4 x i16>
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i16> %st, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T432, ptr %loadaddr
 ; ASM: vld1.64
diff --git a/llvm/test/Analysis/CostModel/ARM/shuffle.ll b/llvm/test/Analysis/CostModel/ARM/shuffle.ll
index a17bbab..0fd1456 100644
--- a/llvm/test/Analysis/CostModel/ARM/shuffle.ll
+++ b/llvm/test/Analysis/CostModel/ARM/shuffle.ll
@@ -1,59 +1,59 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON
+; RUN: opt < %s  -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt < %s  -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @broadcast() {
 ; CHECK-MVE-LABEL: 'broadcast'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:40 Lat:80 SizeLat:80 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:5 Lat:10 SizeLat:10 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-NEON-LABEL: 'broadcast'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 8 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 14 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 26 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 50 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
   %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer
@@ -90,58 +90,58 @@ define void @broadcast() {
 ;; Reverse shuffles should be lowered to vrev and possibly a vext (for quadwords, on neon)
 define void @reverse() {
 ; CHECK-MVE-LABEL: 'reverse'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3i24 = shufflevector <3 x i24> undef, <3 x i24> undef, <3 x i32> <i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v3i124 = shufflevector <3 x i124> undef, <3 x i124> undef, <3 x i32> <i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %v3i24 = shufflevector <3 x i24> undef, <3 x i24> undef, <3 x i32> <i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v3i124 = shufflevector <3 x i124> undef, <3 x i124> undef, <3 x i32> <i32 2, i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-NEON-LABEL: 'reverse'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i24 = shufflevector <3 x i24> undef, <3 x i24> undef, <3 x i32> <i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v3i124 = shufflevector <3 x i124> undef, <3 x i124> undef, <3 x i32> <i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 20 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v3i24 = shufflevector <3 x i24> undef, <3 x i24> undef, <3 x i32> <i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 18 for: %v3i124 = shufflevector <3 x i124> undef, <3 x i124> undef, <3 x i32> <i32 2, i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
   %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -179,40 +179,40 @@ define void @reverse() {
 
 define void @concat() {
 ; CHECK-MVE-LABEL: 'concat'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-NEON-LABEL: 'concat'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 12 for: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 24 for: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 48 for: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 12 for: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 24 for: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 48 for: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 12 for: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 24 for: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 12 for: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 20 for: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 20 for: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -241,54 +241,54 @@ define void @concat() {
 
 define void @select() {
 ; CHECK-MVE-LABEL: 'select'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-NEON-LABEL: 'select'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 6 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 48 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 32 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 16 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 32 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 20 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3>
   %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
@@ -324,40 +324,40 @@ define void @select() {
 
 define void @vrev2() {
 ; CHECK-MVE-LABEL: 'vrev2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-NEON-LABEL: 'vrev2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 24 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 48 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 96 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 24 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 48 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 96 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 24 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 48 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 24 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 20 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 20 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 8 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
@@ -386,26 +386,26 @@ define void @vrev2() {
 
 define void @vrev4() {
 ; CHECK-MVE-LABEL: 'vrev4'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-NEON-LABEL: 'vrev4'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 48 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 96 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 48 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 96 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 48 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
diff --git a/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll
index df26121..924a629 100644
--- a/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll
+++ b/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+; RUN: opt < %s  -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+
 ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
 ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
 
@@ -17,11 +18,11 @@ target triple = "armv7--linux-gnueabihf"
 
 define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'direct'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %v0, %v1
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = sub <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T432, ptr %loadaddr
 ; ASM: vld1.64
@@ -36,13 +37,13 @@ define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'ups1632'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32>
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32>
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %r1, %r2
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r1 = sext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = sub <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T416, ptr %loadaddr
 ; ASM: vldr
@@ -59,13 +60,13 @@ define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'upu1632'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32>
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32>
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %r1, %r2
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of 0 for: %r2 = zext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = sub <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T416, ptr %loadaddr
 ; ASM: vldr
@@ -82,12 +83,12 @@ define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'ups3264'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <2 x i32> %v0, %v1
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64>
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = sub <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: %st = sext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T232, ptr %loadaddr
 ; ASM: vldr
@@ -104,12 +105,12 @@ define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'upu3264'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <2 x i32> %v0, %v1
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64>
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = sub <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: %st = zext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T232, ptr %loadaddr
 ; ASM: vldr
@@ -126,12 +127,12 @@ define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 
 define void @dn3216(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
 ; COST-LABEL: 'dn3216'
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %v0, %v1
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16>
-; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, ptr %storeaddr, align 8
-; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found costs of 1 for: %r3 = sub <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found costs of 1 for: %st = trunc <4 x i32> %r3 to <4 x i16>
+; COST-NEXT:  Cost Model: Found costs of 1 for: store <4 x i16> %st, ptr %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v0 = load %T432, ptr %loadaddr
 ; ASM: vld1.64
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
index 0f75887..c68ae92 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -1734,8 +1734,7 @@ define float @test_different_call_conv_target(float %x) {
 define <2 x i32> @test_shufflevector_s32_v2s32(i32 %arg) {
 ; CHECK-LABEL: name: test_shufflevector_s32_v2s32
 ; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], shufflemask(0, 0)
+; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ARG]](s32), [[ARG]](s32)
 ; CHECK: $d0 = COPY [[VEC]](<2 x s32>)
   %vec = insertelement <1 x i32> undef, i32 %arg, i32 0
   %res = shufflevector <1 x i32> %vec, <1 x i32> undef, <2 x i32> zeroinitializer
@@ -1745,7 +1744,8 @@ define <2 x i32> @test_shufflevector_s32_v2s32(i32 %arg) {
 define i32 @test_shufflevector_v2s32_s32(<2 x i32> %arg) {
 ; CHECK-LABEL: name: test_shufflevector_v2s32_s32
 ; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], shufflemask(1)
+; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ARG]](<2 x s32>), [[IDX]](s64)
 ; CHECK: $w0 = COPY [[RES]](s32)
   %vec = shufflevector <2 x i32> %arg, <2 x i32> undef, <1 x i32> <i32 1>
   %res = extractelement <1 x i32> %vec, i32 0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index 2e70252..fdd0ebb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -119,33 +119,6 @@ body:             |
 
 ...
 ---
-name:            shuffle_1elt_mask
-alignment:       4
-tracksRegLiveness: true
-body:             |
-  bb.1:
-    liveins: $d0, $d1
-
-    ; CHECK-LABEL: name: shuffle_1elt_mask
-    ; CHECK: liveins: $d0, $d1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
-    ; CHECK-NEXT: $d0 = COPY [[COPY2]](s64)
-    ; CHECK-NEXT: $d1 = COPY [[COPY3]](s64)
-    ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1
-    %0:_(s64) = COPY $d0
-    %1:_(s64) = COPY $d1
-    %3:_(s64) = G_SHUFFLE_VECTOR %0:_(s64), %1:_, shufflemask(0)
-    %4:_(s64) = G_SHUFFLE_VECTOR %0:_(s64), %1:_, shufflemask(1)
-    $d0 = COPY %3(s64)
-    $d1 = COPY %4(s64)
-    RET_ReallyLR implicit $d0, implicit $d1
-
-...
----
 name:            oversize_shuffle_v4i64
 alignment:       4
 tracksRegLiveness: true
@@ -641,7 +614,8 @@ body:             |
     %0:_(<8 x s1>) = G_TRUNC %2:_(<8 x s8>)
     %3:_(<8 x s8>) = COPY $d1
     %1:_(<8 x s1>) = G_TRUNC %3:_(<8 x s8>)
-    %4:_(s1) = G_SHUFFLE_VECTOR %0:_(<8 x s1>), %1:_, shufflemask(12)
+    %7:_(s64) = G_CONSTANT i64 4
+    %4:_(s1) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s1>), %7(s64)
     %5:_(s8) = G_ZEXT %4:_(s1)
     %6:_(s32) = G_ANYEXT %5:_(s8)
     $w0 = COPY %6:_(s32)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir
index 9261d7a..914dfa0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir
@@ -80,22 +80,3 @@ body:             |
     %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(-1,0,1,-1)
     RET_ReallyLR implicit %2
 ...
-
----
-name: shuffle_vector_scalar
-tracksRegLiveness: true
-body:             |
-  bb.1:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: shuffle_vector_scalar
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY]](s64), [[COPY]](s64), [[COPY]](s64)
-    ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
-    %0:_(s64) = COPY $x0
-    %1:_(s64) = COPY $x1
-    %2:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0, 0, 0)
-    RET_ReallyLR implicit %2
-...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-undef-rhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-undef-rhs.mir
index 9bf7993..7a314ba 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-undef-rhs.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-undef-rhs.mir
@@ -20,23 +20,3 @@ body:             |
     %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(0, 2, 1, 3)
     RET_ReallyLR implicit %2
 ...
-
----
-name: shuffle_vector_undef_rhs_scalar
-tracksRegLiveness: true
-body:             |
-  bb.1:
-    liveins: $x0
-
-    ; CHECK-LABEL: name: shuffle_vector_undef_rhs_scalar
-    ; CHECK: liveins: $x0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
-    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[DEF]](s64)
-    ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<2 x s64>)
-    %0:_(s64) = COPY $x0
-    %1:_(s64) = G_IMPLICIT_DEF
-    %2:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(s64), %1(s64), shufflemask(0, 1)
-    RET_ReallyLR implicit %2
-...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir
index 2c9ae5b..9013410 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir
@@ -386,7 +386,7 @@ body:             |
     ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>)
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
-    %6:_(<4 x p0>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,1,0,1)
+    %6:_(<4 x p0>) = G_BUILD_VECTOR %0, %1, %0, %1
     RET_ReallyLR implicit %6
 ...
 
@@ -408,104 +408,6 @@ body:             |
     ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<2 x p0>)
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
-    %6:_(<2 x p0>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1,0)
-    RET_ReallyLR implicit %6
-...
-
-# Check that we properly use undef values when shuffle_vector
-# on scalars gets lowered to build_vector.
----
-name: shuffle_vector_on_scalars_to_build_vector_with_undef
-tracksRegLiveness: true
-body:             |
-  bb.1:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: shuffle_vector_on_scalars_to_build_vector_with_undef
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
-    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[COPY1]](s64)
-    ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
-    %0:_(s64) = COPY $x0
-    %1:_(s64) = COPY $x1
-    %6:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,-1,-1,1)
-    RET_ReallyLR implicit %6
-...
-
-# Check that shuffle_vector on scalars gets combined into a plain
-# copy when the resulting type is a scalar as well and the sizes
-# are compatible.
----
-name: shuffle_vector_on_scalars_to_copy_ptr
-tracksRegLiveness: true
-body:             |
-  bb.1:
-    liveins: $x0
-
-    ; CHECK-LABEL: name: shuffle_vector_on_scalars_to_copy_ptr
-    ; CHECK: liveins: $x0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK-NEXT: RET_ReallyLR implicit [[COPY]](p0)
-    %0:_(p0) = COPY $x0
-    %6:_(p0) = G_SHUFFLE_VECTOR %0, %0, shufflemask(0)
-    RET_ReallyLR implicit %6
-...
----
-name: shuffle_vector_to_copy_lhs
-tracksRegLiveness: true
-body:             |
-  bb.1:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: shuffle_vector_to_copy_lhs
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
-    ; CHECK-NEXT: RET_ReallyLR implicit [[EVEC]](s32)
-    %0:_(<2 x s32>) = COPY $x0
-    %1:_(<2 x s32>) = COPY $x1
-    %6:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1)
-    RET_ReallyLR implicit %6
-...
----
-name: shuffle_vector_to_copy_rhs
-tracksRegLiveness: true
-body:             |
-  bb.1:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: shuffle_vector_to_copy_rhs
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x1
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
-    ; CHECK-NEXT: RET_ReallyLR implicit [[EVEC]](s32)
-    %0:_(<2 x s32>) = COPY $x0
-    %1:_(<2 x s32>) = COPY $x1
-    %6:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2)
-    RET_ReallyLR implicit %6
-...
----
-name: shuffle_vector_to_copy_undef
-tracksRegLiveness: true
-body:             |
-  bb.1:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: shuffle_vector_to_copy_undef
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: RET_ReallyLR implicit [[DEF]](s32)
-    %0:_(<2 x s32>) = COPY $x0
-    %1:_(<2 x s32>) = COPY $x1
-    %6:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(-1)
+    %6:_(<2 x p0>) = G_BUILD_VECTOR %1, %0
     RET_ReallyLR implicit %6
 ...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
index b252884..46dbc15 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
@@ -96,7 +96,7 @@ body:             |
     ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32)
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr(s32) = COPY [[COPY2]](s32)
     ; CHECK-NEXT: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY2]](s32), [[COPY3]], [[SITOFP]]
-    ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:gpr(s32) = G_FPTOSI [[SELECT]](s32)
+    ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:fpr(s32) = G_FPTOSI [[SELECT]](s32)
     %0:_(s32) = COPY $w0
     %2:_(s32) = COPY $w1
     %3:_(s32) = COPY $w2
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
new file mode 100644
index 0000000..a729772
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
@@ -0,0 +1,1943 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-NOFPRCVT
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1  | FileCheck %s --check-prefixes=CHECK
+
+; CHECK-GI: warning: Instruction selection used fallback path for fptosi_i32_f16_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f16_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f32_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f64_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f64_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f32_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f16_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f16_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f32_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32_simd
+
+;
+; FPTOI
+;
+
+define float @test_fptosi_f16_i32_simd(half %a)  {
+; CHECK-NOFPRCVT-LABEL: test_fptosi_f16_i32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: test_fptosi_f16_i32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %r = fptosi half %a to i32
+  %bc = bitcast i32 %r to float
+  ret float %bc
+}
+
+define double @test_fptosi_f16_i64_simd(half %a)  {
+; CHECK-NOFPRCVT-LABEL: test_fptosi_f16_i64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: test_fptosi_f16_i64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %r = fptosi half %a to i64
+  %bc = bitcast i64 %r to double
+  ret double %bc
+}
+
+define float @test_fptosi_f64_i32_simd(double %a)  {
+; CHECK-NOFPRCVT-LABEL: test_fptosi_f64_i32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: test_fptosi_f64_i32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %r = fptosi double %a to i32
+  %bc = bitcast i32 %r to float
+  ret float %bc
+}
+
+define double @test_fptosi_f32_i64_simd(float %a)  {
+; CHECK-NOFPRCVT-LABEL: test_fptosi_f32_i64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: test_fptosi_f32_i64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %r = fptosi float %a to i64
+  %bc = bitcast i64 %r to double
+  ret double %bc
+}
+
+define double @test_fptosi_f64_i64_simd(double %a)  {
+; CHECK-NOFPRCVT-LABEL: test_fptosi_f64_i64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: test_fptosi_f64_i64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %r = fptosi double %a to i64
+  %bc = bitcast i64 %r to double
+  ret double %bc
+}
+
+
+define float @test_fptosi_f32_i32_simd(float %a)  {
+; CHECK-NOFPRCVT-LABEL: test_fptosi_f32_i32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: test_fptosi_f32_i32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %r = fptosi float %a to i32
+  %bc = bitcast i32 %r to float
+  ret float %bc
+}
+
+define float @test_fptoui_f16_i32_simd(half %a)  {
+; CHECK-NOFPRCVT-LABEL: test_fptoui_f16_i32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: test_fptoui_f16_i32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
+  %r = fptoui half %a to i32
+  %bc = bitcast i32 %r to float
+  ret float %bc
+}
+
+define double @test_fptoui_f16_i64_simd(half %a)  {
+; CHECK-NOFPRCVT-LABEL: test_fptoui_f16_i64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: test_fptoui_f16_i64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %r = fptoui half %a to i64
+  %bc = bitcast i64 %r to double
+  ret double %bc
+}
+
+define float @test_fptoui_f64_i32_simd(double %a)  {
+; CHECK-NOFPRCVT-LABEL: test_fptoui_f64_i32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: test_fptoui_f64_i32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %r = fptoui double %a to i32
+  %bc = bitcast i32 %r to float
+  ret float %bc
+}
+
+define double @test_fptoui_f32_i64_simd(float %a)  {
+; CHECK-NOFPRCVT-LABEL: test_fptoui_f32_i64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: test_fptoui_f32_i64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %r = fptoui float %a to i64
+  %bc = bitcast i64 %r to double
+  ret double %bc
+}
+
+define double @test_fptoui_f64_i64_simd(double %a)  {
+; CHECK-NOFPRCVT-LABEL: test_fptoui_f64_i64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: test_fptoui_f64_i64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %r = fptoui double %a to i64
+  %bc = bitcast i64 %r to double
+  ret double %bc
+}
+
+
+define float @test_fptoui_f32_i32_simd(float %a)  {
+; CHECK-NOFPRCVT-LABEL: test_fptoui_f32_i32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: test_fptoui_f32_i32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ret
+  %r = fptoui float %a to i32
+  %bc = bitcast i32 %r to float
+  ret float %bc
+}
+
+
+;
+; FPTOI strictfp
+;
+
+define float @fptosi_i32_f16_simd(half %x)  {
+; CHECK-NOFPRCVT-LABEL: fptosi_i32_f16_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptosi_i32_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i32 %val to float
+  ret float %sum
+}
+
+define double @fptosi_i64_f16_simd(half %x)  {
+; CHECK-NOFPRCVT-LABEL: fptosi_i64_f16_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptosi_i64_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i64 %val to double
+  ret double %sum
+}
+
+define double @fptosi_i64_f32_simd(float %x)  {
+; CHECK-NOFPRCVT-LABEL: fptosi_i64_f32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptosi_i64_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
+  %bc = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @fptosi_i32_f64_simd(double %x)  {
+; CHECK-NOFPRCVT-LABEL: fptosi_i32_f64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptosi_i32_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict")
+  %bc = bitcast i32 %val to float
+  ret float %bc
+}
+
+define double @fptosi_i64_f64_simd(double %x)  {
+; CHECK-NOFPRCVT-LABEL: fptosi_i64_f64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptosi_i64_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict")
+  %bc = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @fptosi_i32_f32_simd(float %x)  {
+; CHECK-NOFPRCVT-LABEL: fptosi_i32_f32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptosi_i32_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict")
+  %bc = bitcast i32 %val to float
+  ret float %bc
+}
+
+
+
+define float @fptoui_i32_f16_simd(half %x)  {
+; CHECK-NOFPRCVT-LABEL: fptoui_i32_f16_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptoui_i32_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i32 %val to float
+  ret float %sum
+}
+
+define double @fptoui_i64_f16_simd(half %x)  {
+; CHECK-NOFPRCVT-LABEL: fptoui_i64_f16_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptoui_i64_f16_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
+  %sum = bitcast i64 %val to double
+  ret double %sum
+}
+
+define double @fptoui_i64_f32_simd(float %x)  {
+; CHECK-NOFPRCVT-LABEL: fptoui_i64_f32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptoui_i64_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
+  %bc = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @fptoui_i32_f64_simd(double %x)  {
+; CHECK-NOFPRCVT-LABEL: fptoui_i32_f64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptoui_i32_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict")
+  %bc = bitcast i32 %val to float
+  ret float %bc
+}
+
+define double @fptoui_i64_f64_simd(double %x)  {
+; CHECK-NOFPRCVT-LABEL: fptoui_i64_f64_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptoui_i64_f64_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict")
+  %bc = bitcast i64 %val to double
+  ret double %bc
+}
+
+define float @fptoui_i32_f32_simd(float %x)  {
+; CHECK-NOFPRCVT-LABEL: fptoui_i32_f32_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fptoui_i32_f32_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ret
+  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict")
+  %bc = bitcast i32 %val to float
+  ret float %bc
+}
+
+;
+; FPTOI rounding
+;
+
+
+define double @fcvtas_ds_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtas_ds_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtas_ds_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.round.f32(float %a)
+  %i = fptosi float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtas_sd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtas_sd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtas_sd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.round.f64(double %a)
+  %i = fptosi double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtas_ss_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtas_ss_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtas_ss_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.round.f32(float %a)
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtas_dd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtas_dd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtas_dd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.round.f64(double %a)
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+define double @fcvtau_ds_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtau_ds_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtau x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtau_ds_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.round.f32(float %a)
+  %i = fptoui float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtau_sd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtau_sd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtau w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtau_sd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.round.f64(double %a)
+  %i = fptoui double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtau_ss_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtau_ss_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtau_ss_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.round.f32(float %a)
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtau_dd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtau_dd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtau_dd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.round.f64(double %a)
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+define double @fcvtms_ds_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtms_ds_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtms_ds_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.floor.f32(float %a)
+  %i = fptosi float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtms_sd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtms_sd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtms_sd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.floor.f64(double %a)
+  %i = fptosi double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtms_ss_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtms_ss_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtms_ss_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.floor.f32(float %a)
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtms_dd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtms_dd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtms_dd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.floor.f64(double %a)
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+
+define double @fcvtmu_ds_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtmu_ds_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtmu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtmu_ds_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.floor.f32(float %a)
+  %i = fptoui float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtmu_sd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtmu_sd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtmu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtmu_sd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.floor.f64(double %a)
+  %i = fptoui double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtmu_ss_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtmu_ss_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtmu_ss_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.floor.f32(float %a)
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtmu_dd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtmu_dd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtmu_dd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.floor.f64(double %a)
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+define double @fcvtps_ds_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtps_ds_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtps_ds_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.ceil.f32(float %a)
+  %i = fptosi float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtps_sd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtps_sd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtps_sd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.ceil.f64(double %a)
+  %i = fptosi double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtps_ss_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtps_ss_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtps_ss_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.ceil.f32(float %a)
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtps_dd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtps_dd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtps_dd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.ceil.f64(double %a)
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+define double @fcvtpu_ds_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtpu_ds_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtpu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtpu_ds_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.ceil.f32(float %a)
+  %i = fptoui float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtpu_sd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtpu_sd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtpu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtpu_sd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.ceil.f64(double %a)
+  %i = fptoui double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtpu_ss_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtpu_ss_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtpu_ss_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.ceil.f32(float %a)
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtpu_dd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtpu_dd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtpu_dd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.ceil.f64(double %a)
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+define double @fcvtzs_ds_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_ds_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_ds_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.trunc.f32(float %a)
+  %i = fptosi float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzs_sd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_sd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_sd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.trunc.f64(double %a)
+  %i = fptosi double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtzs_ss_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_ss_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_ss_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.trunc.f32(float %a)
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzs_dd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_dd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_dd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.trunc.f64(double %a)
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtzu_ds_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_ds_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_ds_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.trunc.f32(float %a)
+  %i = fptoui float %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzu_sd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_sd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_sd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.trunc.f64(double %a)
+  %i = fptoui double %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtzu_ss_round_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_ss_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_ss_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.trunc.f32(float %a)
+  %i = fptosi float %r to i32
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzu_dd_round_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_dd_round_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_dd_round_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.trunc.f64(double %a)
+  %i = fptosi double %r to i64
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+
+;
+; FPTOI saturating
+;
+
+define float @fcvtzs_sh_sat_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_sh_sat_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_sh_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzs_dh_sat_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_dh_sat_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_dh_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtzs_ds_sat_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_ds_sat_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_ds_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzs_sd_sat_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_sd_sat_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_sd_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtzs_ss_sat_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_ss_sat_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_ss_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzs_dd_sat_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_dd_sat_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_dd_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzu_sh_sat_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_sh_sat_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_sh_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzu_dh_sat_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_dh_sat_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_dh_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtzu_ds_sat_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_ds_sat_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_ds_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzu_sd_sat_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_sd_sat_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_sd_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtzu_ss_sat_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_ss_sat_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_ss_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzu_dd_sat_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_dd_sat_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_dd_sat_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+;
+; FPTOI saturating with rounding
+;
+
+define float @fcvtas_sh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtas_sh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtas_sh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.round.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtas_dh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtas_dh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtas_dh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.round.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtas_ds_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtas_ds_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtas_ds_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.round.f32(float %a)
+  %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtas_sd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtas_sd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtas_sd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.round.f64(double %a)
+  %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtas_ss_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtas_ss_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtas_ss_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.round.f32(float %a)
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtas_dd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtas_dd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtas_dd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.round.f64(double %a)
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtau_sh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtau_sh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtau w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtau_sh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau s0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.round.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtau_dh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtau_dh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtau x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtau_dh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau d0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.round.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtau_ds_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtau_ds_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtau x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtau_ds_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.round.f32(float %a)
+  %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtau_sd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtau_sd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtau w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtau_sd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtau s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.round.f64(double %a)
+  %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtau_ss_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtau_ss_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtau_ss_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.round.f32(float %a)
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtau_dd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtau_dd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtas d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtau_dd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.round.f64(double %a)
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtms_sh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtms_sh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtms_sh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.floor.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtms_dh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtms_dh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtms_dh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.floor.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtms_ds_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtms_ds_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtms_ds_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.floor.f32(float %a)
+  %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtms_sd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtms_sd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtms_sd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.floor.f64(double %a)
+  %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtms_ss_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtms_ss_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtms_ss_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.floor.f32(float %a)
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtms_dd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtms_dd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtms_dd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.floor.f64(double %a)
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtmu_sh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtmu_sh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtmu w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtmu_sh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu s0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.floor.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtmu_dh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtmu_dh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtmu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtmu_dh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu d0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.floor.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtmu_ds_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtmu_ds_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtmu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtmu_ds_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.floor.f32(float %a)
+  %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtmu_sd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtmu_sd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtmu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtmu_sd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtmu s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.floor.f64(double %a)
+  %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtmu_ss_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtmu_ss_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtmu_ss_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.floor.f32(float %a)
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtmu_dd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtmu_dd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtms d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtmu_dd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtms d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.floor.f64(double %a)
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtps_sh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtps_sh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtps_sh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.ceil.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtps_dh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtps_dh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtps_dh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.ceil.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtps_ds_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtps_ds_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtps_ds_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.ceil.f32(float %a)
+  %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtps_sd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtps_sd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtps_sd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.ceil.f64(double %a)
+  %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtps_ss_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtps_ss_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtps_ss_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.ceil.f32(float %a)
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtps_dd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtps_dd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtps_dd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.ceil.f64(double %a)
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtpu_sh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtpu_sh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtpu w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtpu_sh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu s0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.ceil.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtpu_dh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtpu_dh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtpu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtpu_dh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu d0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.ceil.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtpu_ds_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtpu_ds_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtpu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtpu_ds_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.ceil.f32(float %a)
+  %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtpu_sd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtpu_sd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtpu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtpu_sd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtpu s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.ceil.f64(double %a)
+  %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtpu_ss_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtpu_ss_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtpu_ss_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.ceil.f32(float %a)
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtpu_dd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtpu_dd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtps d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtpu_dd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtps d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.ceil.f64(double %a)
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzs_sh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_sh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_sh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.trunc.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzs_dh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_dh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_dh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.trunc.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtzs_ds_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_ds_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_ds_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.trunc.f32(float %a)
+  %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzs_sd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_sd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_sd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.trunc.f64(double %a)
+  %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtzs_ss_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_ss_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_ss_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.trunc.f32(float %a)
+  %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzs_dd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_dd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_dd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.trunc.f64(double %a)
+  %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzu_sh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_sh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_sh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.trunc.f16(half %a) nounwind readnone
+  %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzu_dh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_dh_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_dh_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %r = call half @llvm.trunc.f16(half %a) nounwind readnone
+  %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define double @fcvtzu_ds_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_ds_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_ds_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.trunc.f32(float %a)
+  %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
+
+define float @fcvtzu_sd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_sd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu w8, d0
+; CHECK-NOFPRCVT-NEXT:    fmov s0, w8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_sd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.trunc.f64(double %a)
+  %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define float @fcvtzu_ss_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_ss_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu s0, s0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_ss_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ret
+  %r = call float @llvm.trunc.f32(float %a)
+  %i = call i32 @llvm.fptoui.sat.i32.f32(float %r)
+  %bc = bitcast i32 %i to float
+  ret float %bc
+}
+
+define double @fcvtzu_dd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_dd_simd:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_dd_simd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %r = call double @llvm.trunc.f64(double %a)
+  %i = call i64 @llvm.fptoui.sat.i64.f64(double %r)
+  %bc = bitcast i64 %i to double
+  ret double %bc
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index e18a5f6..d8f3708 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -980,12 +980,18 @@ define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
 }
 
 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
-; CHECK-LABEL: test_bitcastv8i8tov1f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg v0.8b, v0.8b
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_bitcastv8i8tov1f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    neg v0.8b, v0.8b
+; CHECK-SD-NEXT:    fcvtzs x8, d0
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_bitcastv8i8tov1f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    neg v0.8b, v0.8b
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ret
   %sub.i = sub <8 x i8> zeroinitializer, %a
   %1 = bitcast <8 x i8> %sub.i to <1 x double>
   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -993,12 +999,18 @@ define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
 }
 
 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
-; CHECK-LABEL: test_bitcastv4i16tov1f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg v0.4h, v0.4h
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_bitcastv4i16tov1f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    neg v0.4h, v0.4h
+; CHECK-SD-NEXT:    fcvtzs x8, d0
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_bitcastv4i16tov1f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    neg v0.4h, v0.4h
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ret
   %sub.i = sub <4 x i16> zeroinitializer, %a
   %1 = bitcast <4 x i16> %sub.i to <1 x double>
   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -1006,12 +1018,18 @@ define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
 }
 
 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
-; CHECK-LABEL: test_bitcastv2i32tov1f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg v0.2s, v0.2s
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_bitcastv2i32tov1f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    neg v0.2s, v0.2s
+; CHECK-SD-NEXT:    fcvtzs x8, d0
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_bitcastv2i32tov1f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    neg v0.2s, v0.2s
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ret
   %sub.i = sub <2 x i32> zeroinitializer, %a
   %1 = bitcast <2 x i32> %sub.i to <1 x double>
   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
@@ -1031,8 +1049,7 @@ define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
 ; CHECK-GI-NEXT:    fmov x8, d0
 ; CHECK-GI-NEXT:    neg x8, x8
 ; CHECK-GI-NEXT:    fmov d0, x8
-; CHECK-GI-NEXT:    fcvtzs x8, d0
-; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    fcvtzs d0, d0
 ; CHECK-GI-NEXT:    ret
   %sub.i = sub <1 x i64> zeroinitializer, %a
   %1 = bitcast <1 x i64> %sub.i to <1 x double>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index 627d31f..1e0cfa0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -359,11 +359,16 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind {
 
 ; FIXME: Generate "fcvtzs d0, d0"?
 define <1 x i64> @fcvtzs_1d(<1 x double> %A) nounwind {
-; CHECK-LABEL: fcvtzs_1d:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_1d:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs x8, d0
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_1d:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzs d0, d0
+; CHECK-GI-NEXT:    ret
 	%tmp3 = fptosi <1 x double> %A to <1 x i64>
 	ret <1 x i64> %tmp3
 }
@@ -438,11 +443,16 @@ define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind {
 
 ; FIXME: Generate "fcvtzu d0, d0"?
 define <1 x i64> @fcvtzu_1d(<1 x double> %A) nounwind {
-; CHECK-LABEL: fcvtzu_1d:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu x8, d0
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_1d:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu x8, d0
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_1d:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcvtzu d0, d0
+; CHECK-GI-NEXT:    ret
 	%tmp3 = fptoui <1 x double> %A to <1 x i64>
 	ret <1 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index c741129..b963acd 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -31,8 +31,7 @@ define <1 x i32> @test_signed_v1f32_v1i32(<1 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v1f32_v1i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcvtzs w8, s0
-; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    fcvtzs s0, s0
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f32.v1i32(<1 x float> %f)
     ret <1 x i32> %x
@@ -1162,18 +1161,24 @@ declare <7 x i32> @llvm.fptosi.sat.v7f16.v7i32 (<7 x half>)
 declare <8 x i32> @llvm.fptosi.sat.v8f16.v8i32 (<8 x half>)
 
 define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v1f16_v1i32:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzs w8, s0
-; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_signed_v1f16_v1i32:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-SD-CVT-NEXT:    fmov s0, w8
+; CHECK-SD-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_signed_v1f16_v1i32:
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fcvtzs w8, h0
 ; CHECK-FP16-NEXT:    fmov s0, w8
 ; CHECK-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v1f16_v1i32:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzs s0, s0
+; CHECK-GI-CVT-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
 }
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index efe0a1b..5a66b68 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -31,8 +31,7 @@ define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v1f32_v1i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcvtzu w8, s0
-; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    fcvtzu s0, s0
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f32.v1i32(<1 x float> %f)
     ret <1 x i32> %x
@@ -993,18 +992,24 @@ declare <7 x i32> @llvm.fptoui.sat.v7f16.v7i32 (<7 x half>)
 declare <8 x i32> @llvm.fptoui.sat.v8f16.v8i32 (<8 x half>)
 
 define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v1f16_v1i32:
-; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    ret
+; CHECK-SD-CVT-LABEL: test_unsigned_v1f16_v1i32:
+; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    fcvt s0, h0
+; CHECK-SD-CVT-NEXT:    fcvtzu w8, s0
+; CHECK-SD-CVT-NEXT:    fmov s0, w8
+; CHECK-SD-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_unsigned_v1f16_v1i32:
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fcvtzu w8, h0
 ; CHECK-FP16-NEXT:    fmov s0, w8
 ; CHECK-FP16-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v1f16_v1i32:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzu s0, s0
+; CHECK-GI-CVT-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
 }
diff --git a/llvm/test/CodeGen/AArch64/ldst-prepost-uses.ll b/llvm/test/CodeGen/AArch64/ldst-prepost-uses.ll
new file mode 100644
index 0000000..85991fb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ldst-prepost-uses.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -O3 -mtriple=aarch64 | FileCheck %s
+
+; From #164775, this generates a pre-index load feeding a post-index store, that
+; was checking the wrong uses for post-inc. It seems quite delicate for it to
+; generate this combination at the wrong point to hit the same issue.
+
+@g_260 = dso_local global i16 0
+@g_480 = dso_local global i16 0
+
+define i32 @func_1(ptr %l_3253) {
+; CHECK-LABEL: func_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #128
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov w9, #2 // =0x2
+; CHECK-NEXT:    mov w10, #96 // =0x60
+; CHECK-NEXT:    strb wzr, [x9]
+; CHECK-NEXT:    mov w9, #111 // =0x6f
+; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:    str wzr, [x9]
+; CHECK-NEXT:    mov w9, #80 // =0x50
+; CHECK-NEXT:    adrp x1, .L_MergedGlobals
+; CHECK-NEXT:    add x1, x1, :lo12:.L_MergedGlobals
+; CHECK-NEXT:    strh wzr, [x8]
+; CHECK-NEXT:    str q0, [x9]
+; CHECK-NEXT:    mov w9, #48 // =0x30
+; CHECK-NEXT:    str q0, [x9]
+; CHECK-NEXT:    mov w9, #32 // =0x20
+; CHECK-NEXT:    str q0, [x10]
+; CHECK-NEXT:    mov w10, #64 // =0x40
+; CHECK-NEXT:    str q0, [x9]
+; CHECK-NEXT:    mov w9, #16 // =0x10
+; CHECK-NEXT:    str q0, [x10]
+; CHECK-NEXT:    str q0, [x9]
+; CHECK-NEXT:    str q0, [x8]
+; CHECK-NEXT:    adrp x8, .L_MergedGlobals
+; CHECK-NEXT:    strb wzr, [x0, #8]
+; CHECK-NEXT:    strb wzr, [x0, #12]
+; CHECK-NEXT:    strb wzr, [x0, #16]
+; CHECK-NEXT:    strb wzr, [x0, #20]
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ldrh wzr, [x8, :lo12:.L_MergedGlobals]
+; CHECK-NEXT:    ldrh w8, [x1, #4]!
+; CHECK-NEXT:    sub w8, w8, #1
+; CHECK-NEXT:    strh w8, [x1]
+; CHECK-NEXT:    add sp, sp, #128
+; CHECK-NEXT:    b use
+entry:
+  %l_32531.sroa.3 = alloca [3 x i8], align 4
+  %l_32531.sroa.4 = alloca [115 x i8], align 4
+  call void @llvm.lifetime.start.p0(ptr %l_32531.sroa.3)
+  call void @llvm.lifetime.start.p0(ptr %l_32531.sroa.4)
+  call void @llvm.memset.p0.i64(ptr null, i8 0, i64 3, i1 false)
+  call void @llvm.memset.p0.i64(ptr null, i8 0, i64 115, i1 false)
+  %0 = getelementptr inbounds i8, ptr %l_3253, i64 8
+  store i8 0, ptr %0, align 4
+  %1 = getelementptr inbounds i8, ptr %l_3253, i64 12
+  store i8 0, ptr %1, align 4
+  %2 = getelementptr inbounds i8, ptr %l_3253, i64 16
+  store i8 0, ptr %2, align 4
+  %3 = getelementptr inbounds i8, ptr %l_3253, i64 20
+  store i8 0, ptr %3, align 4
+  %4 = load volatile i16, ptr @g_260, align 4
+  %5 = load i16, ptr @g_480, align 4
+  %dec.i.i = add i16 %5, -1
+  store i16 %dec.i.i, ptr @g_480, align 4
+  %call1 = tail call i32 @use(i32 0, ptr @g_480)
+  ret i32 %call1
+}
+
+declare i32 @use(i32, ptr)
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll
index 06c53d8..b17b48c 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll
@@ -86,4 +86,111 @@ define i64 @sme_cntsd_mul() {
   ret i64 %res
 }
 
-declare i64 @llvm.aarch64.sme.cntsd()
+define i64 @sme_cntsb_mul_pos() {
+; CHECK-LABEL: sme_cntsb_mul_pos:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x8, #24
+; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ret
+  %v = call i64 @llvm.aarch64.sme.cntsd()
+  %shl = shl nuw nsw i64 %v, 3
+  %res = mul nuw nsw i64 %shl, 96
+  ret i64 %res
+}
+
+define i64 @sme_cntsh_mul_pos() {
+; CHECK-LABEL: sme_cntsh_mul_pos:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x8, #3
+; CHECK-NEXT:    lsr x0, x8, #1
+; CHECK-NEXT:    ret
+  %v = call i64 @llvm.aarch64.sme.cntsd()
+  %shl = shl nuw nsw i64 %v, 2
+  %res = mul nuw nsw i64 %shl, 3
+  ret i64 %res
+}
+
+define i64 @sme_cntsw_mul_pos() {
+; CHECK-LABEL: sme_cntsw_mul_pos:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x8, #31
+; CHECK-NEXT:    lsr x0, x8, #1
+; CHECK-NEXT:    ret
+  %v = call i64 @llvm.aarch64.sme.cntsd()
+  %shl = shl nuw nsw i64 %v, 1
+  %res = mul nuw nsw i64 %shl, 62
+  ret i64 %res
+}
+
+define i64 @sme_cntsd_mul_pos() {
+; CHECK-LABEL: sme_cntsd_mul_pos:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x8, #31
+; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ret
+  %v = call i64 @llvm.aarch64.sme.cntsd()
+  %res = mul nuw nsw i64 %v, 992
+  ret i64 %res
+}
+
+define i64 @sme_cntsb_mul_neg() {
+; CHECK-LABEL: sme_cntsb_mul_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x8, #-24
+; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ret
+  %v = call i64 @llvm.aarch64.sme.cntsd()
+  %shl = shl nuw nsw i64 %v, 3
+  %res = mul nuw nsw i64 %shl, -96
+  ret i64 %res
+}
+
+define i64 @sme_cntsh_mul_neg() {
+; CHECK-LABEL: sme_cntsh_mul_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x8, #-3
+; CHECK-NEXT:    lsr x0, x8, #1
+; CHECK-NEXT:    ret
+  %v = call i64 @llvm.aarch64.sme.cntsd()
+  %shl = shl nuw nsw i64 %v, 2
+  %res = mul nuw nsw i64 %shl, -3
+  ret i64 %res
+}
+
+define i64 @sme_cntsw_mul_neg() {
+; CHECK-LABEL: sme_cntsw_mul_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x8, #-31
+; CHECK-NEXT:    lsl x0, x8, #3
+; CHECK-NEXT:    ret
+  %v = call i64 @llvm.aarch64.sme.cntsd()
+  %shl = shl nuw nsw i64 %v, 1
+  %res = mul nuw nsw i64 %shl, -992
+  ret i64 %res
+}
+
+define i64 @sme_cntsd_mul_neg() {
+; CHECK-LABEL: sme_cntsd_mul_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x8, #-3
+; CHECK-NEXT:    lsr x0, x8, #3
+; CHECK-NEXT:    ret
+  %v = call i64 @llvm.aarch64.sme.cntsd()
+  %res = mul nuw nsw  i64 %v, -3
+  ret i64 %res
+}
+
+; Negative test for optimization failure
+define i64 @sme_cntsd_mul_fail() {
+; CHECK-LABEL: sme_cntsd_mul_fail:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdsvl x8, #1
+; CHECK-NEXT:    mov w9, #993 // =0x3e1
+; CHECK-NEXT:    lsr x8, x8, #3
+; CHECK-NEXT:    mul x0, x8, x9
+; CHECK-NEXT:    ret
+  %v = call i64 @llvm.aarch64.sme.cntsd()
+  %res = mul nuw nsw i64 %v, 993
+  ret i64 %res
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir
index c8bd8ab..423ce82 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir
@@ -18,58 +18,12 @@ body: |
     ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
-    %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1)
+    %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1
     $vgpr0_vgpr1 = COPY %2
 
 ...
 
 ---
-name: shufflevector_scalar_src_dst
-tracksRegLiveness: true
-
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1
-
-    ; CHECK-LABEL: name: shufflevector_scalar_src_dst
-    ; CHECK: liveins: $vgpr0, $vgpr1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
-    %0:_(s32) = COPY $vgpr0
-    %1:_(s32) = COPY $vgpr1
-    %2:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1)
-    $vgpr0 = COPY %2
-
-...
-
----
-name: shufflevector_scalar_dst
-tracksRegLiveness: true
-
-body: |
-  bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
-
-    ; CHECK-LABEL: name: shufflevector_scalar_dst
-    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32)
-    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    %2:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2)
-    $vgpr0 = COPY %2
-
-...
-
----
 name: shufflevector_v2s32_0_1
 tracksRegLiveness: true
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir
index 6e4c6bc..31e3d97 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir
@@ -147,15 +147,17 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1
-    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load (<8 x s16>), align 8, addrspace 3)
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<8 x s16>)
-    ; CHECK-NEXT: G_STORE [[UV4]](s16), [[COPY1]](p3) :: (store (s16), addrspace 3)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p3) :: (load (s16), addrspace 3)
+    ; CHECK-NEXT: G_STORE [[LOAD]](s16), [[COPY1]](p3) :: (store (s16), addrspace 3)
     ; CHECK-NEXT: SI_RETURN
     %0:_(p3) = COPY $vgpr0
     %1:_(p3) = COPY $vgpr1
     %2:_(<8 x s16>) = G_IMPLICIT_DEF
     %3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
-    %4:_(s16) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4)
+    %idx:_(s32) = G_CONSTANT i32 4
+    %4:_(s16) = G_EXTRACT_VECTOR_ELT %3(<8 x s16>), %idx
     G_STORE %4(s16), %1(p3) :: (store (s16), addrspace 3)
     SI_RETURN
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shufflevector-pointer-crash.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/shufflevector-pointer-crash.mir
index ac903ad..e7bba9d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shufflevector-pointer-crash.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shufflevector-pointer-crash.mir
@@ -19,15 +19,15 @@ body: |
     ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
     ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[BITCAST]](<2 x p0>)
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[UV]](p0)
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
-    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p0)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p0)
     ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32)
     ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32)
     ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
     %0:_(p0) = G_CONSTANT i64 0
     %1:_(<2 x p0>) = G_BUILD_VECTOR %0:_(p0), %0:_(p0)
     %2:_(<2 x p0>) = G_LOAD %0:_(p0) :: (load (<2 x p0>))
-    %3:_(p0) = G_SHUFFLE_VECTOR %2:_(<2 x p0>), %1:_, shufflemask(0)
+    %idx:_(s32) = G_CONSTANT i32 0
+    %3:_(p0) = G_EXTRACT_VECTOR_ELT %2:_(<2 x p0>), %idx
     %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %3:_(p0)
     $vgpr0 = COPY %4:_(s32)
     $vgpr1 = COPY %5:_(s32)
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
index 25119fe..7df230c 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@@ -441,8 +441,7 @@ entry:
 define i32 @test_shufflevector_s32_v2s32(i32 %arg) {
 ; CHECK-LABEL: name: test_shufflevector_s32_v2s32
 ; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $r0
-; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], shufflemask(0, 0)
+; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ARG]](s32), [[ARG]](s32)
 ; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>)
   %vec = insertelement <1 x i32> undef, i32 %arg, i32 0
   %shuffle = shufflevector <1 x i32> %vec, <1 x i32> undef, <2 x i32> zeroinitializer
@@ -453,8 +452,7 @@ define i32 @test_shufflevector_s32_v2s32(i32 %arg) {
 define i32 @test_shufflevector_s32_s32_s32(i32 %arg) {
 ; CHECK-LABEL: name: test_shufflevector_s32_s32_s32
 ; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $r0
-; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-; CHECK: [[VEC:%[0-9]+]]:_(s32) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], shufflemask(0)
+; CHECK: r0 = COPY [[ARG]](s32)
   %vec = insertelement <1 x i32> undef, i32 %arg, i32 0
   %shuffle = shufflevector <1 x i32> %vec, <1 x i32> undef, <1 x i32> zeroinitializer
   %res = extractelement <1 x i32> %shuffle, i32 0
diff --git a/llvm/test/CodeGen/MIR/AArch64/parse-shufflemask-invalid-scalar.mir b/llvm/test/CodeGen/MIR/AArch64/parse-shufflemask-invalid-scalar.mir
new file mode 100644
index 0000000..6c6f9b6
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AArch64/parse-shufflemask-invalid-scalar.mir
@@ -0,0 +1,15 @@
+# RUN: not llc -mtriple=aarch64-- -run-pass=none -o /dev/null %s 2>&1 | FileCheck %s
+
+---
+name: test_shuffle_0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0
+    %0:_(<2 x s32>) = COPY $d0
+    %2:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[@LINE+1]]:67: shufflemask should have > 1 element
+    %1:_(s32) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(0)
+    $w0 = COPY %1
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/MIR/AArch64/parse-shufflemask.mir b/llvm/test/CodeGen/MIR/AArch64/parse-shufflemask.mir
index c529d63..4cc3bc6 100644
--- a/llvm/test/CodeGen/MIR/AArch64/parse-shufflemask.mir
+++ b/llvm/test/CodeGen/MIR/AArch64/parse-shufflemask.mir
@@ -122,54 +122,3 @@ body:             |
     RET_ReallyLR implicit $d0
 
 ...
-
-# CHECK-LABEL: name: test_shuffle_0
-# CHECK: G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(0)
----
-name: test_shuffle_0
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $d0
-
-    %0:_(<2 x s32>) = COPY $d0
-    %2:_(<2 x s32>) = G_IMPLICIT_DEF
-    %1:_(s32) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(0)
-    $w0 = COPY %1
-    RET_ReallyLR implicit $w0
-
-...
-
-# CHECK-LABEL: name: test_shuffle_1
-# CHECK: G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1)
----
-name: test_shuffle_1
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $d0
-
-    %0:_(<2 x s32>) = COPY $d0
-    %2:_(<2 x s32>) = G_IMPLICIT_DEF
-    %1:_(s32) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(1)
-    $w0 = COPY %1
-    RET_ReallyLR implicit $w0
-
-...
-
-# CHECK-LABEL: name: test_shuffle_undef
-# CHECK: G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(undef)
----
-name: test_shuffle_undef
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $d0
-
-    %0:_(<2 x s32>) = COPY $d0
-    %2:_(<2 x s32>) = G_IMPLICIT_DEF
-    %1:_(s32) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(undef)
-    $w0 = COPY %1
-    RET_ReallyLR implicit $w0
-
-...
diff --git a/llvm/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll b/llvm/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
index f82cd11..f12fc4a8 100644
--- a/llvm/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
+++ b/llvm/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
@@ -23,6 +23,6 @@ entry:
 
 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) #1
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-sitofp.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-sitofp.mir
index 64e569c..3d40240 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-sitofp.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-sitofp.mir
@@ -63,7 +63,7 @@
     ret double %conv
   }
 
-  attributes #0 = { norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
   !llvm.module.flags = !{!0}
   !llvm.ident = !{!1}
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-sitofp.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-sitofp.mir
index 0bb6061..8f76ad5 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-sitofp.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-sitofp.mir
@@ -35,7 +35,7 @@
     ret double %conv
   }
 
-  attributes #0 = { norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
   !llvm.module.flags = !{!0}
   !llvm.ident = !{!1}
diff --git a/llvm/test/CodeGen/X86/StackColoring-use-between-allocas.mir b/llvm/test/CodeGen/X86/StackColoring-use-between-allocas.mir
index a992222..1612485 100644
--- a/llvm/test/CodeGen/X86/StackColoring-use-between-allocas.mir
+++ b/llvm/test/CodeGen/X86/StackColoring-use-between-allocas.mir
@@ -64,9 +64,9 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #3
 
-  attributes #0 = { ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="64" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="64" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "use-soft-float"="false" }
   attributes #1 = { argmemonly nounwind willreturn }
-  attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "use-soft-float"="false" }
   attributes #3 = { nounwind }
 
   !llvm.module.flags = !{!0, !1}
diff --git a/llvm/test/CodeGen/X86/atom-fixup-lea4.ll b/llvm/test/CodeGen/X86/atom-fixup-lea4.ll
index 8e7a463..69689f0 100644
--- a/llvm/test/CodeGen/X86/atom-fixup-lea4.ll
+++ b/llvm/test/CodeGen/X86/atom-fixup-lea4.ll
@@ -18,5 +18,5 @@ entry:
 ; Function Attrs: uwtable
 declare void @_ZN12ValueWrapperIS_IS_IdEEEC2Ev(ptr) unnamed_addr #0 align 2
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
diff --git a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change.mir b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change.mir
index 99fee27..88d7682 100644
--- a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change.mir
+++ b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change.mir
@@ -52,7 +52,7 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #2
 
-  attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
   attributes #1 = { nounwind readnone speculatable }
   attributes #2 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change2.mir b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change2.mir
index 50b2433..8dbd4e2 100644
--- a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change2.mir
+++ b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change2.mir
@@ -63,7 +63,7 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #2
 
-  attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
   attributes #1 = { nounwind readnone speculatable }
   attributes #2 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change3.mir b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change3.mir
index 7a4b993..c0924ea 100644
--- a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change3.mir
+++ b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change3.mir
@@ -73,7 +73,7 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #2
   
-  attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
   attributes #1 = { nounwind readnone speculatable }
   attributes #2 = { nounwind }
   
diff --git a/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll b/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll
index da9d16c..f074390 100644
--- a/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll
+++ b/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll
@@ -502,6 +502,6 @@ entry:
   ret void
 }
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
index b4ba239..7fd4f59 100644
--- a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
+++ b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
@@ -48,5 +48,5 @@ entry:
 ; Function Attrs: nounwind readnone
 declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) #1
 
-attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/X86/bit-piece-comment.ll b/llvm/test/CodeGen/X86/bit-piece-comment.ll
index d74863f..85c64a7 100644
--- a/llvm/test/CodeGen/X86/bit-piece-comment.ll
+++ b/llvm/test/CodeGen/X86/bit-piece-comment.ll
@@ -32,7 +32,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
-attributes #0 = { norecurse nounwind optsize readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind optsize readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/CodeGen/X86/catchpad-regmask.ll b/llvm/test/CodeGen/X86/catchpad-regmask.ll
index 9dba897..713d015 100644
--- a/llvm/test/CodeGen/X86/catchpad-regmask.ll
+++ b/llvm/test/CodeGen/X86/catchpad-regmask.ll
@@ -130,7 +130,7 @@ unreachable:                                      ; preds = %entry
 ; CHECK: retq                            # CATCHRET
 
 
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #1 = { noreturn }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/X86/catchpad-weight.ll b/llvm/test/CodeGen/X86/catchpad-weight.ll
index e97f358..699243d 100644
--- a/llvm/test/CodeGen/X86/catchpad-weight.ll
+++ b/llvm/test/CodeGen/X86/catchpad-weight.ll
@@ -74,8 +74,8 @@ declare void @"\01??1HasDtor@@QEAA@XZ"(ptr) #3
 ; Function Attrs: nounwind argmemonly
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1
 
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #1 = { nounwind argmemonly }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "use-soft-float"="false" }
+attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #4 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/clang-section-coff.ll b/llvm/test/CodeGen/X86/clang-section-coff.ll
index 02381fd..6b76bb6 100644
--- a/llvm/test/CodeGen/X86/clang-section-coff.ll
+++ b/llvm/test/CodeGen/X86/clang-section-coff.ll
@@ -37,8 +37,8 @@ attributes #0 = { "bss-section"="my_bss.1" "data-section"="my_data.1" "rodata-se
 attributes #1 = { "data-section"="my_data.1" "rodata-section"="my_rodata.1" }
 attributes #2 = { "bss-section"="my_bss.2" "rodata-section"="my_rodata.1" }
 attributes #3 = { "bss-section"="my_bss.2" "data-section"="my_data.2" "rodata-section"="my_rodata.2" }
-attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #7 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #7 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 
diff --git a/llvm/test/CodeGen/X86/cleanuppad-inalloca.ll b/llvm/test/CodeGen/X86/cleanuppad-inalloca.ll
index 01e7019..863f580 100644
--- a/llvm/test/CodeGen/X86/cleanuppad-inalloca.ll
+++ b/llvm/test/CodeGen/X86/cleanuppad-inalloca.ll
@@ -65,4 +65,4 @@ declare i32 @__CxxFrameHandler3(...)
 
 declare x86_thiscallcc void @"\01??1A@@QAE@XZ"(ptr) #0
 
-attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/X86/combine-adc.ll b/llvm/test/CodeGen/X86/combine-adc.ll
index 2241736..a2aaea3 100644
--- a/llvm/test/CodeGen/X86/combine-adc.ll
+++ b/llvm/test/CodeGen/X86/combine-adc.ll
@@ -89,4 +89,52 @@ define i32 @adc_merge_constants(i32 %a0) nounwind {
   ret i32 %sum
 }
 
+define i32 @adc_merge_sub(i32 %a0) nounwind {
+; X86-LABEL: adc_merge_sub:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    addl $42, %edi
+; X86-NEXT:    setb %al
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    negl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll use@PLT
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    xorl %edi, %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: adc_merge_sub:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    addl $42, %ebx
+; X64-NEXT:    setb %dil
+; X64-NEXT:    movl %ebx, %ebp
+; X64-NEXT:    negl %ebp
+; X64-NEXT:    callq use@PLT
+; X64-NEXT:    xorl %ebx, %ebp
+; X64-NEXT:    movl %ebp, %eax
+; X64-NEXT:    addq $8, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+  %adc = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %a0, i32 42)
+  %carry = extractvalue { i8, i32 } %adc, 0
+  call void @use(i8 %carry)
+  %sum = extractvalue { i8, i32 } %adc, 1
+  %sub = sub i32 -42, %a0
+  %result = xor i32 %sum, %sub
+  ret i32 %result
+}
+
 declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32)
+declare void @use(i8)
diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll
index 89aee96..62744d4 100644
--- a/llvm/test/CodeGen/X86/combine-sbb.ll
+++ b/llvm/test/CodeGen/X86/combine-sbb.ll
@@ -333,4 +333,85 @@ define i32 @PR40483_sub6(ptr, i32) nounwind {
   ret i32 %10
 }
 
+define i32 @sbb_merge_add1(i32 %a0) nounwind {
+; X86-LABEL: sbb_merge_add1:
+; X86:       # %bb.0:
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    cmpl $42, {{[0-9]+}}(%esp)
+; X86-NEXT:    setb %al
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll use@PLT
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: sbb_merge_add1:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    cmpl $42, %edi
+; X64-NEXT:    setb %al
+; X64-NEXT:    movl %eax, %edi
+; X64-NEXT:    callq use@PLT
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %a0, i32 42)
+  %borrow = extractvalue { i8, i32 } %sbb, 0
+  call void @use(i8 %borrow)
+  %diff = extractvalue { i8, i32 } %sbb, 1
+  %add = add i32 %a0, -42
+  %result = xor i32 %diff, %add
+  ret i32 %result
+}
+
+define i32 @sbb_merge_add2(i32 %a0) nounwind {
+; X86-LABEL: sbb_merge_add2:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl $42, %edi
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    setb %al
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    negl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll use@PLT
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    xorl %edi, %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: sbb_merge_add2:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $42, %ebp
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    subl %edi, %ebp
+; X64-NEXT:    setb %al
+; X64-NEXT:    movl %ebp, %ebx
+; X64-NEXT:    negl %ebx
+; X64-NEXT:    movl %eax, %edi
+; X64-NEXT:    callq use@PLT
+; X64-NEXT:    xorl %ebp, %ebx
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    addq $8, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+  %sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 42, i32 %a0)
+  %borrow = extractvalue { i8, i32 } %sbb, 0
+  call void @use(i8 %borrow)
+  %diff = extractvalue { i8, i32 } %sbb, 1
+  %add = add i32 %a0, -42
+  %result = xor i32 %diff, %add
+  ret i32 %result
+}
+
 declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32)
+declare void @use(i8)
diff --git a/llvm/test/CodeGen/X86/complex-fastmath.ll b/llvm/test/CodeGen/X86/complex-fastmath.ll
index 29a37a1..21bb64a 100644
--- a/llvm/test/CodeGen/X86/complex-fastmath.ll
+++ b/llvm/test/CodeGen/X86/complex-fastmath.ll
@@ -212,4 +212,4 @@ define <2 x double> @complex_mul_f64(<2 x double>, <2 x double>) #0 {
   ret <2 x double> %14
 }
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "unsafe-fp-math"="true"  }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true"  }
diff --git a/llvm/test/CodeGen/X86/crash-lre-eliminate-dead-def.ll b/llvm/test/CodeGen/X86/crash-lre-eliminate-dead-def.ll
index ddddcfa..9f51fa4 100644
--- a/llvm/test/CodeGen/X86/crash-lre-eliminate-dead-def.ll
+++ b/llvm/test/CodeGen/X86/crash-lre-eliminate-dead-def.ll
@@ -264,5 +264,5 @@ unreachable:                                      ; preds = %cleanup100
 ; Function Attrs: nounwind
 declare void @printf(ptr nocapture readonly, ...) #1
 
-attributes #0 = { noreturn nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noreturn nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/X86/dag-optnone.ll b/llvm/test/CodeGen/X86/dag-optnone.ll
index 66e4c1d..022694e 100644
--- a/llvm/test/CodeGen/X86/dag-optnone.ll
+++ b/llvm/test/CodeGen/X86/dag-optnone.ll
@@ -28,7 +28,7 @@
 ; a repeated fadd that can be combined into an fmul.  We show that this
 ; happens in both the non-optnone function and the optnone function.
 
-define float @foo(float %x, ...) #0 {
+define float @foo(float %x, ...) {
 entry:
   %add = fadd fast float %x, %x
   %add1 = fadd fast float %add, %x
@@ -68,5 +68,4 @@ entry:
   ret void
 }
 
-attributes #0 = { "unsafe-fp-math"="true" }
-attributes #1 = { noinline optnone "unsafe-fp-math"="true" }
+attributes #1 = { noinline optnone }
diff --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
index b428ce4..71ad598 100644
--- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
+++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
@@ -96,6 +96,17 @@ entry:
 define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-LABEL: _Z2x6v:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq x1@GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movl (%rax), %edx
+; CHECK-NEXT:    andl $511, %edx # imm = 0x1FF
+; CHECK-NEXT:    leaq 1(%rdx), %rax
+; CHECK-NEXT:    movq x4@GOTPCREL(%rip), %rcx
+; CHECK-NEXT:    movl %eax, (%rcx)
+; CHECK-NEXT:    movq x3@GOTPCREL(%rip), %rcx
+; CHECK-NEXT:    movl (%rcx), %ecx
+; CHECK-NEXT:    testl %ecx, %ecx
+; CHECK-NEXT:    je .LBB1_18
+; CHECK-NEXT:  # %bb.1: # %for.cond1thread-pre-split.lr.ph
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    pushq %r15
@@ -114,58 +125,47 @@ define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-NEXT:    .cfi_offset %r14, -32
 ; CHECK-NEXT:    .cfi_offset %r15, -24
 ; CHECK-NEXT:    .cfi_offset %rbp, -16
-; CHECK-NEXT:    movq x1@GOTPCREL(%rip), %rax
-; CHECK-NEXT:    movl (%rax), %ebx
-; CHECK-NEXT:    andl $511, %ebx # imm = 0x1FF
-; CHECK-NEXT:    leaq 1(%rbx), %rax
-; CHECK-NEXT:    movq x4@GOTPCREL(%rip), %rcx
-; CHECK-NEXT:    movl %eax, (%rcx)
-; CHECK-NEXT:    movq x3@GOTPCREL(%rip), %rcx
-; CHECK-NEXT:    movl (%rcx), %ecx
-; CHECK-NEXT:    testl %ecx, %ecx
-; CHECK-NEXT:    je .LBB1_18
-; CHECK-NEXT:  # %bb.1: # %for.cond1thread-pre-split.lr.ph
-; CHECK-NEXT:    movq x5@GOTPCREL(%rip), %rdx
-; CHECK-NEXT:    movq (%rdx), %rsi
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    notl %edx
-; CHECK-NEXT:    leaq 8(,%rdx,8), %rdi
+; CHECK-NEXT:    movq x5@GOTPCREL(%rip), %rsi
+; CHECK-NEXT:    movq (%rsi), %rsi
+; CHECK-NEXT:    movl %ecx, %edi
+; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    leaq 8(,%rdi,8), %rdi
 ; CHECK-NEXT:    imulq %rax, %rdi
 ; CHECK-NEXT:    addq %rsi, %rdi
 ; CHECK-NEXT:    movq x2@GOTPCREL(%rip), %r8
-; CHECK-NEXT:    movl (%r8), %edx
-; CHECK-NEXT:    leal 8(,%rbx,8), %eax
+; CHECK-NEXT:    movl (%r8), %r9d
+; CHECK-NEXT:    leal 8(,%rdx,8), %eax
 ; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    leaq 32(%rsi), %r11
-; CHECK-NEXT:    leaq 8(,%rbx,8), %rbx
-; CHECK-NEXT:    xorl %r14d, %r14d
-; CHECK-NEXT:    movq x0@GOTPCREL(%rip), %r15
-; CHECK-NEXT:    movq %rsi, %r12
+; CHECK-NEXT:    leaq 32(%rsi), %rbx
+; CHECK-NEXT:    leaq 8(,%rdx,8), %r14
+; CHECK-NEXT:    xorl %r15d, %r15d
+; CHECK-NEXT:    movq x0@GOTPCREL(%rip), %r12
+; CHECK-NEXT:    movq %rsi, %r13
 ; CHECK-NEXT:    jmp .LBB1_2
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB1_15: # %for.cond1.for.inc3_crit_edge
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    movl %edx, (%r8)
+; CHECK-NEXT:    movl %r9d, (%r8)
 ; CHECK-NEXT:  .LBB1_16: # %for.inc3
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    addq %rbx, %r12
-; CHECK-NEXT:    incq %r14
-; CHECK-NEXT:    addq %rbx, %r11
+; CHECK-NEXT:    addq %r14, %r13
+; CHECK-NEXT:    incq %r15
+; CHECK-NEXT:    addq %r14, %rbx
 ; CHECK-NEXT:    incl %ecx
 ; CHECK-NEXT:    je .LBB1_17
 ; CHECK-NEXT:  .LBB1_2: # %for.cond1thread-pre-split
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB1_12 Depth 2
 ; CHECK-NEXT:    # Child Loop BB1_14 Depth 2
-; CHECK-NEXT:    testl %edx, %edx
+; CHECK-NEXT:    testl %r9d, %r9d
 ; CHECK-NEXT:    jns .LBB1_16
 ; CHECK-NEXT:  # %bb.3: # %for.body2.preheader
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    movslq %edx, %r13
-; CHECK-NEXT:    testq %r13, %r13
+; CHECK-NEXT:    movslq %r9d, %r9
+; CHECK-NEXT:    testq %r9, %r9
 ; CHECK-NEXT:    movq $-1, %rbp
-; CHECK-NEXT:    cmovnsq %r13, %rbp
-; CHECK-NEXT:    subq %r13, %rbp
+; CHECK-NEXT:    cmovnsq %r9, %rbp
+; CHECK-NEXT:    subq %r9, %rbp
 ; CHECK-NEXT:    incq %rbp
 ; CHECK-NEXT:    cmpq $4, %rbp
 ; CHECK-NEXT:    jb .LBB1_14
@@ -177,20 +177,20 @@ define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-NEXT:  # %bb.5: # %vector.memcheck
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
 ; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; CHECK-NEXT:    imulq %r14, %rax
-; CHECK-NEXT:    leaq (%rsi,%rax), %r10
-; CHECK-NEXT:    leaq (%r10,%r13,8), %r9
-; CHECK-NEXT:    testq %r13, %r13
-; CHECK-NEXT:    movq $-1, %r10
-; CHECK-NEXT:    cmovnsq %r13, %r10
-; CHECK-NEXT:    cmpq %r15, %r9
+; CHECK-NEXT:    imulq %r15, %rax
+; CHECK-NEXT:    leaq (%rsi,%rax), %r11
+; CHECK-NEXT:    leaq (%r11,%r9,8), %r10
+; CHECK-NEXT:    testq %r9, %r9
+; CHECK-NEXT:    movq $-1, %r11
+; CHECK-NEXT:    cmovnsq %r9, %r11
+; CHECK-NEXT:    cmpq %r12, %r10
 ; CHECK-NEXT:    jae .LBB1_7
 ; CHECK-NEXT:  # %bb.6: # %vector.memcheck
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    leaq 8(%rsi), %r9
-; CHECK-NEXT:    addq %r9, %rax
-; CHECK-NEXT:    leaq (%rax,%r10,8), %rax
-; CHECK-NEXT:    cmpq %r15, %rax
+; CHECK-NEXT:    leaq 8(%rsi), %r10
+; CHECK-NEXT:    addq %r10, %rax
+; CHECK-NEXT:    leaq (%rax,%r11,8), %rax
+; CHECK-NEXT:    cmpq %r12, %rax
 ; CHECK-NEXT:    ja .LBB1_14
 ; CHECK-NEXT:  .LBB1_7: # %vector.body.preheader
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
@@ -201,50 +201,47 @@ define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
 ; CHECK-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; CHECK-NEXT:    movdqu %xmm0, (%r12,%r13,8)
-; CHECK-NEXT:    movdqu %xmm0, 16(%r12,%r13,8)
-; CHECK-NEXT:    movl $4, %r10d
+; CHECK-NEXT:    movdqu %xmm0, (%r13,%r9,8)
+; CHECK-NEXT:    movdqu %xmm0, 16(%r13,%r9,8)
+; CHECK-NEXT:    movl $4, %r11d
 ; CHECK-NEXT:    shrq $2, %rax
 ; CHECK-NEXT:    jne .LBB1_11
 ; CHECK-NEXT:    jmp .LBB1_13
 ; CHECK-NEXT:  .LBB1_8: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    xorl %r10d, %r10d
+; CHECK-NEXT:    xorl %r11d, %r11d
 ; CHECK-NEXT:    shrq $2, %rax
 ; CHECK-NEXT:    je .LBB1_13
 ; CHECK-NEXT:  .LBB1_11: # %vector.body.preheader.new
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
 ; CHECK-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; CHECK-NEXT:    movq %r10, %rax
+; CHECK-NEXT:    movq %r11, %rax
 ; CHECK-NEXT:    subq %rdx, %rax
-; CHECK-NEXT:    addq %r13, %r10
-; CHECK-NEXT:    leaq (%r11,%r10,8), %r10
+; CHECK-NEXT:    addq %r9, %r11
+; CHECK-NEXT:    leaq (%rbx,%r11,8), %r11
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB1_12: # %vector.body
 ; CHECK-NEXT:    # Parent Loop BB1_2 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    movdqu %xmm0, -32(%r10)
-; CHECK-NEXT:    movdqu %xmm0, -16(%r10)
-; CHECK-NEXT:    movdqu %xmm0, (%r10)
-; CHECK-NEXT:    movdqu %xmm0, 16(%r10)
-; CHECK-NEXT:    addq $64, %r10
+; CHECK-NEXT:    movdqu %xmm0, -32(%r11)
+; CHECK-NEXT:    movdqu %xmm0, -16(%r11)
+; CHECK-NEXT:    movdqu %xmm0, (%r11)
+; CHECK-NEXT:    movdqu %xmm0, 16(%r11)
+; CHECK-NEXT:    addq $64, %r11
 ; CHECK-NEXT:    addq $8, %rax
 ; CHECK-NEXT:    jne .LBB1_12
 ; CHECK-NEXT:  .LBB1_13: # %middle.block
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    addq %rdx, %r13
+; CHECK-NEXT:    addq %rdx, %r9
 ; CHECK-NEXT:    cmpq %rdx, %rbp
-; CHECK-NEXT:    movq %r13, %rdx
 ; CHECK-NEXT:    je .LBB1_15
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB1_14: # %for.body2
 ; CHECK-NEXT:    # Parent Loop BB1_2 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    movq (%r15), %rax
-; CHECK-NEXT:    movq %rax, (%r12,%r13,8)
-; CHECK-NEXT:    leaq 1(%r13), %rdx
-; CHECK-NEXT:    cmpq $-1, %r13
-; CHECK-NEXT:    movq %rdx, %r13
+; CHECK-NEXT:    movq (%r12), %rax
+; CHECK-NEXT:    movq %rax, (%r13,%r9,8)
+; CHECK-NEXT:    incq %r9
 ; CHECK-NEXT:    jl .LBB1_14
 ; CHECK-NEXT:    jmp .LBB1_15
 ; CHECK-NEXT:  .LBB1_17: # %for.cond.for.end5_crit_edge
@@ -252,7 +249,6 @@ define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-NEXT:    movq %rdi, (%rax)
 ; CHECK-NEXT:    movq x3@GOTPCREL(%rip), %rax
 ; CHECK-NEXT:    movl $0, (%rax)
-; CHECK-NEXT:  .LBB1_18: # %for.end5
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    popq %r12
@@ -265,6 +261,13 @@ define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_restore %rbx
+; CHECK-NEXT:    .cfi_restore %r12
+; CHECK-NEXT:    .cfi_restore %r13
+; CHECK-NEXT:    .cfi_restore %r14
+; CHECK-NEXT:    .cfi_restore %r15
+; CHECK-NEXT:    .cfi_restore %rbp
+; CHECK-NEXT:  .LBB1_18: # %for.end5
 ; CHECK-NEXT:    retq
 entry:
   %0 = load i32, ptr @x1, align 4
diff --git a/llvm/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll b/llvm/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
index deba5a8..18e5490 100644
--- a/llvm/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
+++ b/llvm/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
@@ -112,9 +112,9 @@ declare void @_Z3fooPcjPKc(ptr, i32, ptr) #2
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
 
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #3 = { nounwind readnone }
 attributes #4 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/dbg-changes-codegen.ll b/llvm/test/CodeGen/X86/dbg-changes-codegen.ll
index fabdbbb..c688895 100644
--- a/llvm/test/CodeGen/X86/dbg-changes-codegen.ll
+++ b/llvm/test/CodeGen/X86/dbg-changes-codegen.ll
@@ -68,8 +68,8 @@ _ZN7Flibble3barEP6Wibble.exit:                    ; preds = %entry, %if.then.i
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
-attributes #0 = { nounwind readonly uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readonly uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 
 !1 = distinct !DISubprogram()
diff --git a/llvm/test/CodeGen/X86/dbg-combine.ll b/llvm/test/CodeGen/X86/dbg-combine.ll
index b3d2213..3ff5a26 100644
--- a/llvm/test/CodeGen/X86/dbg-combine.ll
+++ b/llvm/test/CodeGen/X86/dbg-combine.ll
@@ -63,7 +63,7 @@ declare ptr @llvm.stacksave() #2
 ; Function Attrs: nounwind
 declare void @llvm.stackrestore(ptr) #2
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/debug-loclists-lto.ll b/llvm/test/CodeGen/X86/debug-loclists-lto.ll
index fde8e00..2bd927f 100644
--- a/llvm/test/CodeGen/X86/debug-loclists-lto.ll
+++ b/llvm/test/CodeGen/X86/debug-loclists-lto.ll
@@ -34,8 +34,8 @@ entry:
   ret void, !dbg !29
 }
 
-attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone speculatable willreturn }
 
 !llvm.dbg.cu = !{!0, !7}
diff --git a/llvm/test/CodeGen/X86/debugloc-argsize.ll b/llvm/test/CodeGen/X86/debugloc-argsize.ll
index 3cfeb6e..f4527c5 100644
--- a/llvm/test/CodeGen/X86/debugloc-argsize.ll
+++ b/llvm/test/CodeGen/X86/debugloc-argsize.ll
@@ -30,7 +30,7 @@ declare ptr @__cxa_begin_catch(ptr)
 
 declare void @__cxa_end_catch()
 
-attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "use-soft-float"="false" }
 attributes #1 = { optsize }
 attributes #2 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/early-cfi-sections.ll b/llvm/test/CodeGen/X86/early-cfi-sections.ll
index 3a9e62a..8ab0340 100644
--- a/llvm/test/CodeGen/X86/early-cfi-sections.ll
+++ b/llvm/test/CodeGen/X86/early-cfi-sections.ll
@@ -12,7 +12,7 @@ entry:
   ret void, !dbg !8
 }
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4}
diff --git a/llvm/test/CodeGen/X86/fadd-combines.ll b/llvm/test/CodeGen/X86/fadd-combines.ll
index 2c06c53..a44671c 100644
--- a/llvm/test/CodeGen/X86/fadd-combines.ll
+++ b/llvm/test/CodeGen/X86/fadd-combines.ll
@@ -275,4 +275,4 @@ define <2 x double> @fmul2_negated_vec(<2 x double> %a, <2 x double> %b, <2 x do
   ret <2 x double> %sub
 }
 
-attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
+attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll b/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll
index 5afa12c..1bc94b1 100644
--- a/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll
+++ b/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll
@@ -65,5 +65,5 @@ entry:
 declare i16 @llvm.convert.to.fp16.f64(double)
 declare i16 @llvm.convert.to.fp16.f80(x86_fp80)
 
-attributes #0 = { nounwind readnone "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone "use-soft-float"="false" }
+attributes #1 = { nounwind readnone "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/X86/fdiv.ll b/llvm/test/CodeGen/X86/fdiv.ll
index 67bad09..859f54e 100644
--- a/llvm/test/CodeGen/X86/fdiv.ll
+++ b/llvm/test/CodeGen/X86/fdiv.ll
@@ -54,7 +54,7 @@ define double @denormal2(double %x) {
 
 ; Deleting the negates does not require unsafe-fp-math.
 
-define float @double_negative(float %x, float %y) #0 {
+define float @double_negative(float %x, float %y) {
 ; CHECK-LABEL: double_negative:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    divss %xmm1, %xmm0
@@ -65,7 +65,7 @@ define float @double_negative(float %x, float %y) #0 {
   ret float %div
 }
 
-define <4 x float> @double_negative_vector(<4 x float> %x, <4 x float> %y) #0 {
+define <4 x float> @double_negative_vector(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: double_negative_vector:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    divps %xmm1, %xmm0
@@ -80,7 +80,7 @@ define <4 x float> @double_negative_vector(<4 x float> %x, <4 x float> %y) #0 {
 ; clang/gcc), due to order of argument evaluation not being well defined. We
 ; ended up hitting llvm_unreachable in getNegatedExpression when building with
 ; gcc. Just make sure that we get a deterministic result.
-define float @fdiv_fneg_combine(float %a0, float %a1, float %a2) #0 {
+define float @fdiv_fneg_combine(float %a0, float %a1, float %a2) {
 ; CHECK-LABEL: fdiv_fneg_combine:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm0, %xmm3
@@ -99,6 +99,3 @@ define float @fdiv_fneg_combine(float %a0, float %a1, float %a2) #0 {
   %div5 = fdiv fast float %mul2, %sub4
   ret float %div5
 }
-
-attributes #0 = { "unsafe-fp-math"="false" }
-
diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll
index 4c16cf9..0c3ec8d 100644
--- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll
+++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll
@@ -1021,7 +1021,7 @@ define <8 x double> @test_v8f64_interp_ninf(<8 x double> %x, <8 x double> %y, <8
 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
 ;
 
-define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
+define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
 ; FMA-LABEL: test_v16f32_fneg_fmadd:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm4
@@ -1044,7 +1044,7 @@ define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1,
   ret <16 x float> %neg
 }
 
-define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
+define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
 ; FMA-LABEL: test_v8f64_fneg_fmsub:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm4
@@ -1067,7 +1067,7 @@ define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <
   ret <8 x double> %neg
 }
 
-define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
+define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
 ; FMA-LABEL: test_v16f32_fneg_fnmadd:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm4
@@ -1091,7 +1091,7 @@ define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1,
   ret <16 x float> %neg1
 }
 
-define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
+define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
 ; FMA-LABEL: test_v8f64_fneg_fnmsub:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm4
@@ -1119,7 +1119,7 @@ define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1,
 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
 ;
 
-define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 {
+define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) {
 ; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
@@ -1146,7 +1146,7 @@ define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 {
 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
 ;
 
-define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) #0 {
+define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) {
 ; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2
@@ -1171,7 +1171,7 @@ define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float
 
 ; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
 
-define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 {
+define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) {
 ; FMA-LABEL: test_v16f32_fneg_fmul:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vxorps %xmm4, %xmm4, %xmm4
@@ -1196,7 +1196,7 @@ define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0
   ret <16 x float> %n
 }
 
-define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 {
+define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) {
 ; FMA-LABEL: test_v8f64_fneg_fmul:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
@@ -1221,7 +1221,7 @@ define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 {
   ret <8 x double> %n
 }
 
-define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %y) #0 {
+define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %y) {
 ; FMA-LABEL: test_v8f64_fneg_fmul_no_nsz:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
@@ -1250,7 +1250,6 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %
   ret <8 x double> %n
 }
 
-attributes #0 = { "unsafe-fp-math"="true" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; AVX512-INFS: {{.*}}
 ; FMA-INFS: {{.*}}
diff --git a/llvm/test/CodeGen/X86/fold-tied-op.ll b/llvm/test/CodeGen/X86/fold-tied-op.ll
index 5ea2964..d60d397 100644
--- a/llvm/test/CodeGen/X86/fold-tied-op.ll
+++ b/llvm/test/CodeGen/X86/fold-tied-op.ll
@@ -158,7 +158,7 @@ if.end:                                           ; preds = %if.else, %if.then
   ret i64 undef
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.ident = !{!0}
 
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
index bd32430..fc5279d0 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
@@ -100,40 +100,52 @@ entry:
   ret i32 %result
 }
 
-; These 2 divs only differ in their exception behavior and will be CSEd. Make
-; sure the nofpexcept flag is not set on the combined node.
+; These 4 divs only differ in their exception behavior. They form two groups,
+; whithin each the constrained functions have the same exception hehavior and
+; may be CSE'd. Instructions with different exception behavior belong to
+; different groups, they have different chain argument and cannot be CSE'd.
 define void @binop_cse(double %a, double %b, ptr %x, ptr %y) #0 {
 entry:
 ; CHECK-LABEL: name: binop_cse
-; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0)
-; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1, align 16)
-; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16)
-; CHECK: %3:fr64 = DIVSDrm [[MOVSDrm_alt]], %fixed-stack.2, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load (s64) from %fixed-stack.2)
-; CHECK: MOVSDmr killed [[MOV32rm1]], 1, $noreg, 0, $noreg, %3 :: (store (s64) into %ir.x, align 4)
-; CHECK: MOVSDmr killed [[MOV32rm]], 1, $noreg, 0, $noreg, %3 :: (store (s64) into %ir.y, align 4)
+; CHECK: [[Y:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0)
+; CHECK: [[X:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1, align 16)
+; CHECK: [[B:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2)
+; CHECK: [[A:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16)
+; CHECK: [[DIV0:%[0-9]+]]:fr64 = DIVSDrr [[A]], [[B]], implicit $mxcsr
+; CHECK: [[DIV1:%[0-9]+]]:fr64 = nofpexcept DIVSDrr [[A]], [[B]], implicit $mxcsr
+; CHECK: MOVSDmr killed [[X]], 1, $noreg, 0, $noreg, [[DIV1]] :: (store (s64) into %ir.x, align 4)
+; CHECK: MOVSDmr killed [[Y]], 1, $noreg, 0, $noreg, [[DIV1]] :: (store (s64) into %ir.y, align 4)
 ; CHECK: RET 0
   %div = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  %div1 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   %div2 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
-  store double %div, ptr %x
-  store double %div2, ptr %y
+  %div3 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  store double %div2, ptr %x
+  store double %div3, ptr %y
   ret void
 }
 
-; These 2 sitofps only differ in their exception behavior and will be CSEd. Make
-; sure the nofpexcept flag is not set on the combined node.
+; These 4 divs only differ in their exception behavior. They form two groups,
+; whithin each the constrained functions have the same exception hehavior and
+; may be CSE'd. Instructions with different exception behavior belong to
+; different groups, they have different chain argument and cannot be CSE'd.
 define void @sitofp_cse(i32 %a, ptr %x, ptr %y) #0 {
 entry:
 ; CHECK-LABEL: name: sitofp_cse
-; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 8)
-; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1)
-; CHECK: %2:fr64 = CVTSI2SDrm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.2, align 16)
-; CHECK: MOVSDmr killed [[MOV32rm1]], 1, $noreg, 0, $noreg, %2 :: (store (s64) into %ir.x, align 4)
-; CHECK: MOVSDmr killed [[MOV32rm]], 1, $noreg, 0, $noreg, %2 :: (store (s64) into %ir.y, align 4)
+; CHECK: [[Y:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 8)
+; CHECK: [[X:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1)
+; CHECK: [[A:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.2, align 16)
+; CHECK: [[CVT0:%[0-9]+]]:fr64 = CVTSI2SDrr [[A]]
+; CHECK: [[CVT1:%[0-9]+]]:fr64 = nofpexcept CVTSI2SDrr [[A]]
+; CHECK: MOVSDmr killed [[X]], 1, $noreg, 0, $noreg, [[CVT1]] :: (store (s64) into %ir.x, align 4)
+; CHECK: MOVSDmr killed [[Y]], 1, $noreg, 0, $noreg, [[CVT1]] :: (store (s64) into %ir.y, align 4)
 ; CHECK: RET 0
   %result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  %result1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   %result2 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
-  store double %result, ptr %x
-  store double %result2, ptr %y
+  %result3 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  store double %result2, ptr %x
+  store double %result3, ptr %y
   ret void
 }
 
diff --git a/llvm/test/CodeGen/X86/fp128-g.ll b/llvm/test/CodeGen/X86/fp128-g.ll
index 58a57d3..d2b956f 100644
--- a/llvm/test/CodeGen/X86/fp128-g.ll
+++ b/llvm/test/CodeGen/X86/fp128-g.ll
@@ -106,8 +106,8 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
-attributes #0 = { nounwind readonly uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { norecurse nounwind readonly uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readonly uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "use-soft-float"="false" }
+attributes #1 = { norecurse nounwind readonly uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!2}
diff --git a/llvm/test/CodeGen/X86/fp128-i128.ll b/llvm/test/CodeGen/X86/fp128-i128.ll
index f176a29..ef616ca 100644
--- a/llvm/test/CodeGen/X86/fp128-i128.ll
+++ b/llvm/test/CodeGen/X86/fp128-i128.ll
@@ -526,6 +526,6 @@ cleanup:                                          ; preds = %entry, %if.then
 }
 
 
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/X86/frame-order.ll b/llvm/test/CodeGen/X86/frame-order.ll
index dcbcb48..f410acf 100644
--- a/llvm/test/CodeGen/X86/frame-order.ll
+++ b/llvm/test/CodeGen/X86/frame-order.ll
@@ -74,9 +74,9 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare void @capture(ptr) #2
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #3 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/CodeGen/X86/fsafdo_test2.ll b/llvm/test/CodeGen/X86/fsafdo_test2.ll
index d83e241..fc4c1e8 100644
--- a/llvm/test/CodeGen/X86/fsafdo_test2.ll
+++ b/llvm/test/CodeGen/X86/fsafdo_test2.ll
@@ -196,10 +196,10 @@ if.end9.3:
 }
 
 
-attributes #0 = { noinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind willreturn }
-attributes #2 = { nofree noinline norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nofree noinline norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-soft-float"="false" }
+attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/CodeGen/X86/i386-shrink-wrapping.ll b/llvm/test/CodeGen/X86/i386-shrink-wrapping.ll
index 7bb3bf42..4347d62 100644
--- a/llvm/test/CodeGen/X86/i386-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/i386-shrink-wrapping.ll
@@ -130,5 +130,5 @@ for.end:                                          ; preds = %for.cond.preheader
 ; Function Attrs: nounwind
 declare i32 @varfunc(ptr nocapture readonly, ...) #0
 
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse" "use-soft-float"="false" }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/inline-asm-A-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-A-constraint.ll
index f982196..11a1f39 100644
--- a/llvm/test/CodeGen/X86/inline-asm-A-constraint.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-A-constraint.ll
@@ -23,7 +23,7 @@ entry:
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchg16b
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind }
 
 !llvm.ident = !{!0}
diff --git a/llvm/test/CodeGen/X86/label-annotation.ll b/llvm/test/CodeGen/X86/label-annotation.ll
index 626040c..05e4e87 100644
--- a/llvm/test/CodeGen/X86/label-annotation.ll
+++ b/llvm/test/CodeGen/X86/label-annotation.ll
@@ -77,8 +77,8 @@ entry:
 
 
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { inaccessiblememonly noduplicate nounwind }
 attributes #3 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/label-heapallocsite.ll b/llvm/test/CodeGen/X86/label-heapallocsite.ll
index 31bca25..72834be6c 100644
--- a/llvm/test/CodeGen/X86/label-heapallocsite.ll
+++ b/llvm/test/CodeGen/X86/label-heapallocsite.ll
@@ -98,8 +98,8 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 ; CHECK-NEXT:  .short [[LABEL5]]-[[LABEL4]]
 ; CHECK-NEXT:  .long 4096
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone speculatable willreturn }
 attributes #3 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/late-remat-update.mir b/llvm/test/CodeGen/X86/late-remat-update.mir
index 3212312..9108002 100644
--- a/llvm/test/CodeGen/X86/late-remat-update.mir
+++ b/llvm/test/CodeGen/X86/late-remat-update.mir
@@ -39,8 +39,8 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #2
   
-  attributes #0 = { noreturn uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { noreturn uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
   attributes #2 = { nounwind }
   
   !llvm.module.flags = !{!0, !1}
diff --git a/llvm/test/CodeGen/X86/lea-opt-memop-check-1.ll b/llvm/test/CodeGen/X86/lea-opt-memop-check-1.ll
index 5199b15..96780af0 100644
--- a/llvm/test/CodeGen/X86/lea-opt-memop-check-1.ll
+++ b/llvm/test/CodeGen/X86/lea-opt-memop-check-1.ll
@@ -92,4 +92,4 @@ if.end:
 ; CHECK:	pushl ([[REG3]])
 }
 
-attributes #0 = { nounwind optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/X86/lifetime-alias.ll b/llvm/test/CodeGen/X86/lifetime-alias.ll
index 3efaccb..22e350c 100644
--- a/llvm/test/CodeGen/X86/lifetime-alias.ll
+++ b/llvm/test/CodeGen/X86/lifetime-alias.ll
@@ -140,10 +140,10 @@ declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture reado
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
 
-attributes #0 = { norecurse uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind }
-attributes #2 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nobuiltin nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #3 = { nobuiltin nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #4 = { nounwind }
 attributes #5 = { noreturn nounwind }
 attributes #6 = { builtin nounwind }
diff --git a/llvm/test/CodeGen/X86/limit-split-cost.mir b/llvm/test/CodeGen/X86/limit-split-cost.mir
index 5b8bb98..8e4e786 100644
--- a/llvm/test/CodeGen/X86/limit-split-cost.mir
+++ b/llvm/test/CodeGen/X86/limit-split-cost.mir
@@ -53,8 +53,8 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #2
   
-  attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
   attributes #2 = { nounwind }
   
   !llvm.module.flags = !{!0, !1}
diff --git a/llvm/test/CodeGen/X86/merge-vector-stores-scale-idx-crash.ll b/llvm/test/CodeGen/X86/merge-vector-stores-scale-idx-crash.ll
index 3dba5eb..206d453 100644
--- a/llvm/test/CodeGen/X86/merge-vector-stores-scale-idx-crash.ll
+++ b/llvm/test/CodeGen/X86/merge-vector-stores-scale-idx-crash.ll
@@ -41,5 +41,5 @@ if.end:                                           ; preds = %entry
 
 declare <4 x float> @_Z1bv() local_unnamed_addr 
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
diff --git a/llvm/test/CodeGen/X86/misched-copy.ll b/llvm/test/CodeGen/X86/misched-copy.ll
index fa6cd15..e3ceddf 100644
--- a/llvm/test/CodeGen/X86/misched-copy.ll
+++ b/llvm/test/CodeGen/X86/misched-copy.ll
@@ -42,7 +42,7 @@ end:
   ret i64 %add
 }
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
 
 !0 = !{!"float", !1}
 !1 = !{!"omnipotent char", !2}
diff --git a/llvm/test/CodeGen/X86/misched-matmul.ll b/llvm/test/CodeGen/X86/misched-matmul.ll
index a6c489d..9029167 100644
--- a/llvm/test/CodeGen/X86/misched-matmul.ll
+++ b/llvm/test/CodeGen/X86/misched-matmul.ll
@@ -222,4 +222,4 @@ entry:
   ret void
 }
 
-attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/X86/movpc32-check.ll b/llvm/test/CodeGen/X86/movpc32-check.ll
index e3730d0..4585dcb 100644
--- a/llvm/test/CodeGen/X86/movpc32-check.ll
+++ b/llvm/test/CodeGen/X86/movpc32-check.ll
@@ -12,8 +12,8 @@ entry:
 
 declare void @bar(...) #1
 
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="i686" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="i686" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="i686" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="i686" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8, !9}
diff --git a/llvm/test/CodeGen/X86/ms-inline-asm-avx512.ll b/llvm/test/CodeGen/X86/ms-inline-asm-avx512.ll
index ec17b1d..04db25b 100644
--- a/llvm/test/CodeGen/X86/ms-inline-asm-avx512.ll
+++ b/llvm/test/CodeGen/X86/ms-inline-asm-avx512.ll
@@ -20,5 +20,5 @@ entry:
 ; CHECK: movq    %rax, 7(%rsp)
 ; CHECK: retq
 
-attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+rdrnd,+rdseed,+rtm,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+rdrnd,+rdseed,+rtm,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "use-soft-float"="false" }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/nocf_check.ll b/llvm/test/CodeGen/X86/nocf_check.ll
index 7b184ed..742b07d 100644
--- a/llvm/test/CodeGen/X86/nocf_check.ll
+++ b/llvm/test/CodeGen/X86/nocf_check.ll
@@ -66,8 +66,8 @@ bb2:
   ret void
 }
 
-attributes #0 = { nocf_check noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nocf_check noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-soft-float"="false" }
+attributes #1 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-soft-float"="false" }
 attributes #2 = { nocf_check }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/X86/pr15705.ll b/llvm/test/CodeGen/X86/pr15705.ll
index 3dd4aab..2de9a34 100644
--- a/llvm/test/CodeGen/X86/pr15705.ll
+++ b/llvm/test/CodeGen/X86/pr15705.ll
@@ -45,4 +45,4 @@ return:
   ret i32 %retval.0
 }
 
-attributes #0 = { nounwind readnone ssp "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone ssp "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/X86/pr18846.ll b/llvm/test/CodeGen/X86/pr18846.ll
index 93a9a5d..4239f46 100644
--- a/llvm/test/CodeGen/X86/pr18846.ll
+++ b/llvm/test/CodeGen/X86/pr18846.ll
@@ -122,7 +122,7 @@ for.body65:                                       ; preds = %for.body29
 ; Function Attrs: nounwind
 declare void @llvm.x86.avx.storeu.ps.256(ptr, <8 x float>) #1
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind }
 
 !llvm.ident = !{!0}
diff --git a/llvm/test/CodeGen/X86/pr31045.ll b/llvm/test/CodeGen/X86/pr31045.ll
index 4aa73d7..78ba7cc 100644
--- a/llvm/test/CodeGen/X86/pr31045.ll
+++ b/llvm/test/CodeGen/X86/pr31045.ll
@@ -73,4 +73,4 @@ entry:
   ret void
 }
 
-attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/X86/pr32610.ll b/llvm/test/CodeGen/X86/pr32610.ll
index dc11ba8..6f3602d 100644
--- a/llvm/test/CodeGen/X86/pr32610.ll
+++ b/llvm/test/CodeGen/X86/pr32610.ll
@@ -50,7 +50,7 @@ entry:
   ret void
 }
 
-attributes #0 = { norecurse nounwind optsize ssp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind optsize ssp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "use-soft-float"="false" }
 
 !llvm.ident = !{!0}
 
diff --git a/llvm/test/CodeGen/X86/pr34080-2.ll b/llvm/test/CodeGen/X86/pr34080-2.ll
index de34bfb1..279373a 100644
--- a/llvm/test/CodeGen/X86/pr34080-2.ll
+++ b/llvm/test/CodeGen/X86/pr34080-2.ll
@@ -132,4 +132,4 @@ define void @computeJD(ptr) nounwind {
   ret void
 }
 
-attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="i486" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="i486" "target-features"="+x87" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/X86/pr34080.ll b/llvm/test/CodeGen/X86/pr34080.ll
index d07d1aa..3b46bd3 100644
--- a/llvm/test/CodeGen/X86/pr34080.ll
+++ b/llvm/test/CodeGen/X86/pr34080.ll
@@ -162,4 +162,4 @@ entry:
   ret void
 }
 
-attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/X86/pr34629.ll b/llvm/test/CodeGen/X86/pr34629.ll
index eeb61d2..f7747b1 100644
--- a/llvm/test/CodeGen/X86/pr34629.ll
+++ b/llvm/test/CodeGen/X86/pr34629.ll
@@ -38,7 +38,7 @@ if.end:                                           ; preds = %entry, %if.then
   ret void
 }
 
-attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.module.flags = !{!0}
 !llvm.ident = !{!1}
diff --git a/llvm/test/CodeGen/X86/pr34634.ll b/llvm/test/CodeGen/X86/pr34634.ll
index a374112..980961a 100644
--- a/llvm/test/CodeGen/X86/pr34634.ll
+++ b/llvm/test/CodeGen/X86/pr34634.ll
@@ -54,7 +54,7 @@ entry:
   ret i32 0
 }
 
-attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.module.flags = !{!0}
 !llvm.ident = !{!1}
diff --git a/llvm/test/CodeGen/X86/pr42727.ll b/llvm/test/CodeGen/X86/pr42727.ll
index cf1fa5a..18e884b 100644
--- a/llvm/test/CodeGen/X86/pr42727.ll
+++ b/llvm/test/CodeGen/X86/pr42727.ll
@@ -29,5 +29,5 @@ entry:
   ret void
 }
 
-attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+avx,+avx2,+cx8,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+avx,+avx2,+cx8,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "use-soft-float"="false" }
 
diff --git a/llvm/test/CodeGen/X86/pr48064.mir b/llvm/test/CodeGen/X86/pr48064.mir
index eb74edd..d2eaea7 100644
--- a/llvm/test/CodeGen/X86/pr48064.mir
+++ b/llvm/test/CodeGen/X86/pr48064.mir
@@ -185,8 +185,8 @@
   ; Function Attrs: nounwind
   declare void @llvm.x86.seh.ehregnode(ptr) #7
 
-  attributes #0 = { noinline nounwind sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #1 = { norecurse sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { noinline nounwind sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-soft-float"="false" }
+  attributes #1 = { norecurse sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-soft-float"="false" }
   attributes #2 = { argmemonly nofree nosync nounwind willreturn }
   attributes #3 = { nofree }
   attributes #4 = { nofree nosync nounwind willreturn }
diff --git a/llvm/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll b/llvm/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
index 0d5d822..e497575 100644
--- a/llvm/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
@@ -178,4 +178,4 @@ bb439:                                            ; preds = %bb222, %bb85
   ret void
 }
 
-attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll
index dab7a6a..f8d28ae 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath.ll
@@ -1400,7 +1400,7 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
   ret <16 x float> %div
 }
 
-attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!divf,!vec-divf" }
-attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf,vec-divf" }
-attributes #2 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf:2,vec-divf:2" }
+attributes #0 = { "reciprocal-estimates"="!divf,!vec-divf" }
+attributes #1 = { "reciprocal-estimates"="divf,vec-divf" }
+attributes #2 = { "reciprocal-estimates"="divf:2,vec-divf:2" }
 
diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll
index 77ccaff..7fa13cb 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath2.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll
@@ -1841,8 +1841,8 @@ define <16 x float> @v16f32_no_step2(<16 x float> %x) #3 {
   ret <16 x float> %div
 }
 
-attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!divf,!vec-divf" }
-attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf,vec-divf" }
-attributes #2 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf:2,vec-divf:2" }
-attributes #3 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf:0,vec-divf:0" }
+attributes #0 = { "reciprocal-estimates"="!divf,!vec-divf" }
+attributes #1 = { "reciprocal-estimates"="divf,vec-divf" }
+attributes #2 = { "reciprocal-estimates"="divf:2,vec-divf:2" }
+attributes #3 = { "reciprocal-estimates"="divf:0,vec-divf:0" }
 
diff --git a/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll b/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll
index 50422a8..ea1ca51 100644
--- a/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll
+++ b/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll
@@ -72,7 +72,7 @@ if.end:                                           ; preds = %if.else, %if.then
   ret i32 %add
 }
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind }
 
 !llvm.module.flags = !{!0, !1}
diff --git a/llvm/test/CodeGen/X86/regparm.ll b/llvm/test/CodeGen/X86/regparm.ll
index 6d6802e..95009b5 100644
--- a/llvm/test/CodeGen/X86/regparm.ll
+++ b/llvm/test/CodeGen/X86/regparm.ll
@@ -38,7 +38,7 @@ entry:
 declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1) #1
 
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/X86/seh-catchpad.ll b/llvm/test/CodeGen/X86/seh-catchpad.ll
index cb85f39..85e465b 100644
--- a/llvm/test/CodeGen/X86/seh-catchpad.ll
+++ b/llvm/test/CodeGen/X86/seh-catchpad.ll
@@ -189,9 +189,9 @@ entry:
 ; Function Attrs: nounwind
 declare i32 @puts(ptr nocapture readonly) #3
 
-attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { noinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "use-soft-float"="false" }
+attributes #2 = { noinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "use-soft-float"="false" }
+attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "use-soft-float"="false" }
 attributes #4 = { noinline }
 attributes #5 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/seh-except-finally.ll b/llvm/test/CodeGen/X86/seh-except-finally.ll
index 539d776..fedb0c4 100644
--- a/llvm/test/CodeGen/X86/seh-except-finally.ll
+++ b/llvm/test/CodeGen/X86/seh-except-finally.ll
@@ -136,10 +136,10 @@ declare ptr @llvm.localaddress() #4
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.eh.typeid.for(ptr) #4
 
-attributes #0 = { noinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { noinline nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "use-soft-float"="false" }
+attributes #2 = { noinline nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "use-soft-float"="false" }
+attributes #3 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "use-soft-float"="false" }
 attributes #4 = { nounwind readnone }
 attributes #5 = { noinline }
 attributes #6 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/seh-no-invokes.ll b/llvm/test/CodeGen/X86/seh-no-invokes.ll
index 63e91d3..112031c 100644
--- a/llvm/test/CodeGen/X86/seh-no-invokes.ll
+++ b/llvm/test/CodeGen/X86/seh-no-invokes.ll
@@ -63,8 +63,8 @@ declare i32 @_except_handler3(...)
 ; Function Attrs: nounwind
 declare void @llvm.localescape(...) #3
 
-attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/shrinkwrap-hang.ll b/llvm/test/CodeGen/X86/shrinkwrap-hang.ll
index fe42d31..7e98b8a 100644
--- a/llvm/test/CodeGen/X86/shrinkwrap-hang.ll
+++ b/llvm/test/CodeGen/X86/shrinkwrap-hang.ll
@@ -29,4 +29,4 @@ if.end3:                                          ; preds = %if.end
   ret void
 }
 
-attributes #0 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index a260b32..83bfcd7 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1012,15 +1012,15 @@ define double @sqrt_simplify_before_recip_order(double %x, ptr %p) nounwind {
   ret double %sqrt_fast
 }
 
-attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!sqrtf,!vec-sqrtf,!divf,!vec-divf" }
-attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" }
+attributes #0 = { "reciprocal-estimates"="!sqrtf,!vec-sqrtf,!divf,!vec-divf" }
+attributes #1 = { "reciprocal-estimates"="sqrt,vec-sqrt" }
 attributes #2 = { nounwind readnone }
-attributes #3 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,ieee" }
-attributes #4 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee,preserve-sign" }
-attributes #5 = { "unsafe-fp-math"="true" "reciprocal-estimates"="all:0" }
-attributes #6 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,dynamic" }
+attributes #3 = { "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,ieee" }
+attributes #4 = { "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee,preserve-sign" }
+attributes #5 = { "reciprocal-estimates"="all:0" }
+attributes #6 = { "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,dynamic" }
 
-; Attributes without "unsafe-fp-math"="true"
+; Attributes without
 ; TODO: Merge with previous attributes when this attribute can be deleted.
 attributes #7 = { "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,ieee" } ; #3
 attributes #8 = { "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,dynamic" } ; #6
diff --git a/llvm/test/CodeGen/X86/sse1.ll b/llvm/test/CodeGen/X86/sse1.ll
index 8ac86d1..5005752 100644
--- a/llvm/test/CodeGen/X86/sse1.ll
+++ b/llvm/test/CodeGen/X86/sse1.ll
@@ -251,5 +251,5 @@ define <2 x float> @PR31672() #0 {
 
 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #1
 
-attributes #0 = { nounwind "unsafe-fp-math"="true" }
+attributes #0 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
index 665a84a..d7c9438 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -1912,7 +1912,7 @@ define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
 }
 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
 
-define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
+define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxpd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1927,7 +1927,7 @@ define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
 }
 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
 
-define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
+define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxpd_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1941,7 +1941,7 @@ define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double>
   ret <2 x double> %2
 }
 
-define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 {
+define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxpd_ymm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -1956,7 +1956,7 @@ define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) #0
 }
 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
 
-define <4 x double> @stack_fold_maxpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) #1 {
+define <4 x double> @stack_fold_maxpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxpd_ymm_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -1970,7 +1970,7 @@ define <4 x double> @stack_fold_maxpd_ymm_commutable(<4 x double> %a0, <4 x doub
   ret <4 x double> %2
 }
 
-define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 {
+define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxps:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1985,7 +1985,7 @@ define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 {
 }
 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
 
-define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
+define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxps_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1999,7 +1999,7 @@ define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1
   ret <4 x float> %2
 }
 
-define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
+define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxps_ymm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -2014,7 +2014,7 @@ define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
 }
 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
 
-define <8 x float> @stack_fold_maxps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 {
+define <8 x float> @stack_fold_maxps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxps_ymm_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -2028,7 +2028,7 @@ define <8 x float> @stack_fold_maxps_ymm_commutable(<8 x float> %a0, <8 x float>
   ret <8 x float> %2
 }
 
-define double @stack_fold_maxsd(double %a0, double %a1) #0 {
+define double @stack_fold_maxsd(double %a0, double %a1) {
 ; CHECK-LABEL: stack_fold_maxsd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -2043,7 +2043,7 @@ define double @stack_fold_maxsd(double %a0, double %a1) #0 {
   ret double %3
 }
 
-define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 {
+define double @stack_fold_maxsd_commutable(double %a0, double %a1) {
 ; CHECK-LABEL: stack_fold_maxsd_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -2058,7 +2058,7 @@ define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 {
   ret double %3
 }
 
-define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0 {
+define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxsd_int:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2073,7 +2073,7 @@ define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0
 }
 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
 
-define float @stack_fold_maxss(float %a0, float %a1) #0 {
+define float @stack_fold_maxss(float %a0, float %a1) {
 ; CHECK-LABEL: stack_fold_maxss:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -2088,7 +2088,7 @@ define float @stack_fold_maxss(float %a0, float %a1) #0 {
   ret float %3
 }
 
-define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 {
+define float @stack_fold_maxss_commutable(float %a0, float %a1) {
 ; CHECK-LABEL: stack_fold_maxss_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -2103,7 +2103,7 @@ define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 {
   ret float %3
 }
 
-define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 {
+define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxss_int:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2118,7 +2118,7 @@ define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 {
 }
 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
 
-define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 {
+define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_minpd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2133,7 +2133,7 @@ define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 {
 }
 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
 
-define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
+define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_minpd_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2147,7 +2147,7 @@ define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double>
   ret <2 x double> %2
 }
 
-define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 {
+define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: stack_fold_minpd_ymm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -2162,7 +2162,7 @@ define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) #0
 }
 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
 
-define <4 x double> @stack_fold_minpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) #1 {
+define <4 x double> @stack_fold_minpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: stack_fold_minpd_ymm_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -2176,7 +2176,7 @@ define <4 x double> @stack_fold_minpd_ymm_commutable(<4 x double> %a0, <4 x doub
   ret <4 x double> %2
 }
 
-define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 {
+define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minps:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2191,7 +2191,7 @@ define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 {
 }
 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
 
-define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
+define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minps_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2205,7 +2205,7 @@ define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1
   ret <4 x float> %2
 }
 
-define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
+define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minps_ymm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -2220,7 +2220,7 @@ define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
 }
 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
 
-define <8 x float> @stack_fold_minps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 {
+define <8 x float> @stack_fold_minps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minps_ymm_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -2234,7 +2234,7 @@ define <8 x float> @stack_fold_minps_ymm_commutable(<8 x float> %a0, <8 x float>
   ret <8 x float> %2
 }
 
-define double @stack_fold_minsd(double %a0, double %a1) #0 {
+define double @stack_fold_minsd(double %a0, double %a1) {
 ; CHECK-LABEL: stack_fold_minsd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -2249,7 +2249,7 @@ define double @stack_fold_minsd(double %a0, double %a1) #0 {
   ret double %3
 }
 
-define double @stack_fold_minsd_commutable(double %a0, double %a1) #1 {
+define double @stack_fold_minsd_commutable(double %a0, double %a1) {
 ; CHECK-LABEL: stack_fold_minsd_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -2279,7 +2279,7 @@ define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) {
 }
 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
 
-define float @stack_fold_minss(float %a0, float %a1) #0 {
+define float @stack_fold_minss(float %a0, float %a1) {
 ; CHECK-LABEL: stack_fold_minss:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -2294,7 +2294,7 @@ define float @stack_fold_minss(float %a0, float %a1) #0 {
   ret float %3
 }
 
-define float @stack_fold_minss_commutable(float %a0, float %a1) #1 {
+define float @stack_fold_minss_commutable(float %a0, float %a1) {
 ; CHECK-LABEL: stack_fold_minss_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -2309,7 +2309,7 @@ define float @stack_fold_minss_commutable(float %a0, float %a1) #1 {
   ret float %3
 }
 
-define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) #0 {
+define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minss_int:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -3632,6 +3632,3 @@ define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) {
   %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
   ret <8 x float> %6
 }
-
-attributes #0 = { "unsafe-fp-math"="false" }
-attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
index a75cdf9d..43743d5 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
@@ -609,7 +609,7 @@ define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
 }
 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
 
-define <8 x double> @stack_fold_maxpd_zmm(<8 x double> %a0, <8 x double> %a1) #0 {
+define <8 x double> @stack_fold_maxpd_zmm(<8 x double> %a0, <8 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxpd_zmm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -695,7 +695,7 @@ define <8 x double> @stack_fold_maxpd_zmm_commutable_kz(<8 x double> %a0, <8 x d
   ret <8 x double> %4
 }
 
-define <16 x float> @stack_fold_maxps_zmm(<16 x float> %a0, <16 x float> %a1) #0 {
+define <16 x float> @stack_fold_maxps_zmm(<16 x float> %a0, <16 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxps_zmm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -781,7 +781,7 @@ define <16 x float> @stack_fold_maxps_zmm_commutable_kz(<16 x float> %a0, <16 x
   ret <16 x float> %4
 }
 
-define <8 x double> @stack_fold_minpd_zmm(<8 x double> %a0, <8 x double> %a1) #0 {
+define <8 x double> @stack_fold_minpd_zmm(<8 x double> %a0, <8 x double> %a1) {
 ; CHECK-LABEL: stack_fold_minpd_zmm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -867,7 +867,7 @@ define <8 x double> @stack_fold_minpd_zmm_commutable_kz(<8 x double> %a0, <8 x d
   ret <8 x double> %4
 }
 
-define <16 x float> @stack_fold_minps_zmm(<16 x float> %a0, <16 x float> %a1) #0 {
+define <16 x float> @stack_fold_minps_zmm(<16 x float> %a0, <16 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minps_zmm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -1157,7 +1157,7 @@ define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
   ret <4 x float> %5
 }
 
-define <8 x double> @stack_fold_orpd_zmm(<8 x double> %a0, <8 x double> %a1) #0 {
+define <8 x double> @stack_fold_orpd_zmm(<8 x double> %a0, <8 x double> %a1) {
 ; CHECK-LABEL: stack_fold_orpd_zmm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -1178,7 +1178,7 @@ define <8 x double> @stack_fold_orpd_zmm(<8 x double> %a0, <8 x double> %a1) #0
   ret <8 x double> %6
 }
 
-define <16 x float> @stack_fold_orps_zmm(<16 x float> %a0, <16 x float> %a1) #0 {
+define <16 x float> @stack_fold_orps_zmm(<16 x float> %a0, <16 x float> %a1) {
 ; CHECK-LABEL: stack_fold_orps_zmm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -1414,7 +1414,7 @@ define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
   ret <4 x float> %5
 }
 
-define <8 x double> @stack_fold_xorpd_zmm(<8 x double> %a0, <8 x double> %a1) #0 {
+define <8 x double> @stack_fold_xorpd_zmm(<8 x double> %a0, <8 x double> %a1) {
 ; CHECK-LABEL: stack_fold_xorpd_zmm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -1435,7 +1435,7 @@ define <8 x double> @stack_fold_xorpd_zmm(<8 x double> %a0, <8 x double> %a1) #0
   ret <8 x double> %6
 }
 
-define <16 x float> @stack_fold_xorps_zmm(<16 x float> %a0, <16 x float> %a1) #0 {
+define <16 x float> @stack_fold_xorps_zmm(<16 x float> %a0, <16 x float> %a1) {
 ; CHECK-LABEL: stack_fold_xorps_zmm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -2058,5 +2058,4 @@ define <16 x float> @stack_fold_permilpsvar_zmm_maskz(<16 x float> %a0, <16 x i3
   ret <16 x float> %4
 }
 
-attributes #0 = { "unsafe-fp-math"="false" }
-attributes #1 = { "unsafe-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
+attributes #1 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll
index 52d4d8b..b715df8 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll
@@ -502,7 +502,7 @@ define <8 x half> @stack_fold_getmantsh_maskz(<8 x half> %a0, <8 x half> %a1, pt
   ret <8 x half> %3
 }
 
-define <32 x half> @stack_fold_maxph_zmm(<32 x half> %a0, <32 x half> %a1) #0 {
+define <32 x half> @stack_fold_maxph_zmm(<32 x half> %a0, <32 x half> %a1) {
 ; CHECK-LABEL: stack_fold_maxph_zmm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -517,7 +517,7 @@ define <32 x half> @stack_fold_maxph_zmm(<32 x half> %a0, <32 x half> %a1) #0 {
 }
 declare <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half>, <32 x half>, i32) nounwind readnone
 
-define <32 x half> @stack_fold_maxph_zmm_commuted(<32 x half> %a0, <32 x half> %a1) #0 {
+define <32 x half> @stack_fold_maxph_zmm_commuted(<32 x half> %a0, <32 x half> %a1) {
 ; CHECK-LABEL: stack_fold_maxph_zmm_commuted:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -532,7 +532,7 @@ define <32 x half> @stack_fold_maxph_zmm_commuted(<32 x half> %a0, <32 x half> %
   ret <32 x half> %2
 }
 
-define <32 x half> @stack_fold_maxph_zmm_k(<32 x half> %a0, <32 x half> %a1, i32 %mask, ptr %passthru) #0 {
+define <32 x half> @stack_fold_maxph_zmm_k(<32 x half> %a0, <32 x half> %a1, i32 %mask, ptr %passthru) {
 ; CHECK-LABEL: stack_fold_maxph_zmm_k:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -552,7 +552,7 @@ define <32 x half> @stack_fold_maxph_zmm_k(<32 x half> %a0, <32 x half> %a1, i32
   ret <32 x half> %5
 }
 
-define <32 x half> @stack_fold_maxph_zmm_k_commuted(<32 x half> %a0, <32 x half> %a1, i32 %mask, ptr %passthru) #0 {
+define <32 x half> @stack_fold_maxph_zmm_k_commuted(<32 x half> %a0, <32 x half> %a1, i32 %mask, ptr %passthru) {
 ; CHECK-LABEL: stack_fold_maxph_zmm_k_commuted:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -573,7 +573,7 @@ define <32 x half> @stack_fold_maxph_zmm_k_commuted(<32 x half> %a0, <32 x half>
   ret <32 x half> %5
 }
 
-define <32 x half> @stack_fold_maxph_zmm_kz(<32 x half> %a0, <32 x half> %a1, i32 %mask) #0 {
+define <32 x half> @stack_fold_maxph_zmm_kz(<32 x half> %a0, <32 x half> %a1, i32 %mask) {
 ; CHECK-LABEL: stack_fold_maxph_zmm_kz:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -590,7 +590,7 @@ define <32 x half> @stack_fold_maxph_zmm_kz(<32 x half> %a0, <32 x half> %a1, i3
   ret <32 x half> %4
 }
 
-define <32 x half> @stack_fold_maxph_zmm_kz_commuted(<32 x half> %a0, <32 x half> %a1, i32 %mask) #0 {
+define <32 x half> @stack_fold_maxph_zmm_kz_commuted(<32 x half> %a0, <32 x half> %a1, i32 %mask) {
 ; CHECK-LABEL: stack_fold_maxph_zmm_kz_commuted:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -710,7 +710,7 @@ define <32 x half> @stack_fold_maxph_zmm_commutable_kz_commuted(<32 x half> %a0,
   ret <32 x half> %4
 }
 
-define half @stack_fold_maxsh(half %a0, half %a1) #0 {
+define half @stack_fold_maxsh(half %a0, half %a1) {
 ; CHECK-LABEL: stack_fold_maxsh:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovsh %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -725,7 +725,7 @@ define half @stack_fold_maxsh(half %a0, half %a1) #0 {
   ret half %3
 }
 
-define half @stack_fold_maxsh_commuted(half %a0, half %a1) #0 {
+define half @stack_fold_maxsh_commuted(half %a0, half %a1) {
 ; CHECK-LABEL: stack_fold_maxsh_commuted:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovsh %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -772,7 +772,7 @@ define half @stack_fold_maxsh_commutable_commuted(half %a0, half %a1) #1 {
   ret half %3
 }
 
-define <8 x half> @stack_fold_maxsh_int(<8 x half> %a0, <8 x half> %a1) #0 {
+define <8 x half> @stack_fold_maxsh_int(<8 x half> %a0, <8 x half> %a1) {
 ; CHECK-LABEL: stack_fold_maxsh_int:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -820,7 +820,7 @@ define <8 x half> @stack_fold_maxsh_maskz(<8 x half> %a0, <8 x half> %a1, i8 %ma
   ret <8 x half> %2
 }
 
-define <32 x half> @stack_fold_minph_zmm(<32 x half> %a0, <32 x half> %a1) #0 {
+define <32 x half> @stack_fold_minph_zmm(<32 x half> %a0, <32 x half> %a1) {
 ; CHECK-LABEL: stack_fold_minph_zmm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -835,7 +835,7 @@ define <32 x half> @stack_fold_minph_zmm(<32 x half> %a0, <32 x half> %a1) #0 {
 }
 declare <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half>, <32 x half>, i32) nounwind readnone
 
-define <32 x half> @stack_fold_minph_zmm_commuted(<32 x half> %a0, <32 x half> %a1) #0 {
+define <32 x half> @stack_fold_minph_zmm_commuted(<32 x half> %a0, <32 x half> %a1) {
 ; CHECK-LABEL: stack_fold_minph_zmm_commuted:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -850,7 +850,7 @@ define <32 x half> @stack_fold_minph_zmm_commuted(<32 x half> %a0, <32 x half> %
   ret <32 x half> %2
 }
 
-define <32 x half> @stack_fold_minph_zmm_k(<32 x half> %a0, <32 x half> %a1, i32 %mask, ptr %passthru) #0 {
+define <32 x half> @stack_fold_minph_zmm_k(<32 x half> %a0, <32 x half> %a1, i32 %mask, ptr %passthru) {
 ; CHECK-LABEL: stack_fold_minph_zmm_k:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -870,7 +870,7 @@ define <32 x half> @stack_fold_minph_zmm_k(<32 x half> %a0, <32 x half> %a1, i32
   ret <32 x half> %5
 }
 
-define <32 x half> @stack_fold_minph_zmm_k_commuted(<32 x half> %a0, <32 x half> %a1, i32 %mask, ptr %passthru) #0 {
+define <32 x half> @stack_fold_minph_zmm_k_commuted(<32 x half> %a0, <32 x half> %a1, i32 %mask, ptr %passthru) {
 ; CHECK-LABEL: stack_fold_minph_zmm_k_commuted:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -891,7 +891,7 @@ define <32 x half> @stack_fold_minph_zmm_k_commuted(<32 x half> %a0, <32 x half>
   ret <32 x half> %5
 }
 
-define <32 x half> @stack_fold_minph_zmm_kz(<32 x half> %a0, <32 x half> %a1, i32 %mask) #0 {
+define <32 x half> @stack_fold_minph_zmm_kz(<32 x half> %a0, <32 x half> %a1, i32 %mask) {
 ; CHECK-LABEL: stack_fold_minph_zmm_kz:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -908,7 +908,7 @@ define <32 x half> @stack_fold_minph_zmm_kz(<32 x half> %a0, <32 x half> %a1, i3
   ret <32 x half> %4
 }
 
-define <32 x half> @stack_fold_minph_zmm_kz_commuted(<32 x half> %a0, <32 x half> %a1, i32 %mask) #0 {
+define <32 x half> @stack_fold_minph_zmm_kz_commuted(<32 x half> %a0, <32 x half> %a1, i32 %mask) {
 ; CHECK-LABEL: stack_fold_minph_zmm_kz_commuted:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -1028,7 +1028,7 @@ define <32 x half> @stack_fold_minph_zmm_commutable_kz_commuted(<32 x half> %a0,
   ret <32 x half> %4
 }
 
-define half @stack_fold_minsh(half %a0, half %a1) #0 {
+define half @stack_fold_minsh(half %a0, half %a1) {
 ; CHECK-LABEL: stack_fold_minsh:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovsh %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -1043,7 +1043,7 @@ define half @stack_fold_minsh(half %a0, half %a1) #0 {
   ret half %3
 }
 
-define half @stack_fold_minsh_commuted(half %a0, half %a1) #0 {
+define half @stack_fold_minsh_commuted(half %a0, half %a1) {
 ; CHECK-LABEL: stack_fold_minsh_commuted:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovsh %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -1090,7 +1090,7 @@ define half @stack_fold_minsh_commutable_commuted(half %a0, half %a1) #1 {
   ret half %3
 }
 
-define <8 x half> @stack_fold_minsh_int(<8 x half> %a0, <8 x half> %a1) #0 {
+define <8 x half> @stack_fold_minsh_int(<8 x half> %a0, <8 x half> %a1) {
 ; CHECK-LABEL: stack_fold_minsh_int:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2316,5 +2316,4 @@ define <4 x float> @stack_fold_fcmaddcsh_maskz(<4 x float> %a0, <4 x float> %a1,
 }
 declare <4 x float> @llvm.x86.avx512fp16.maskz.vfcmadd.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32)
 
-attributes #0 = { "unsafe-fp-math"="false" }
-attributes #1 = { "unsafe-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
+attributes #1 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
index 4fed6bc..cd06f2d 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
@@ -381,7 +381,7 @@ define <16 x half> @stack_fold_getmantph_maskz_ymm(<16 x half> %a0, ptr %mask) {
   ret <16 x half> %3
 }
 
-define <8 x half> @stack_fold_maxph(<8 x half> %a0, <8 x half> %a1) #0 {
+define <8 x half> @stack_fold_maxph(<8 x half> %a0, <8 x half> %a1) {
 ; CHECK-LABEL: stack_fold_maxph:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -396,7 +396,7 @@ define <8 x half> @stack_fold_maxph(<8 x half> %a0, <8 x half> %a1) #0 {
 }
 declare <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half>, <8 x half>) nounwind readnone
 
-define <8 x half> @stack_fold_maxph_commutable(<8 x half> %a0, <8 x half> %a1) #1 {
+define <8 x half> @stack_fold_maxph_commutable(<8 x half> %a0, <8 x half> %a1) {
 ; CHECK-LABEL: stack_fold_maxph_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -410,7 +410,7 @@ define <8 x half> @stack_fold_maxph_commutable(<8 x half> %a0, <8 x half> %a1) #
   ret <8 x half> %2
 }
 
-define <16 x half> @stack_fold_maxph_ymm(<16 x half> %a0, <16 x half> %a1) #0 {
+define <16 x half> @stack_fold_maxph_ymm(<16 x half> %a0, <16 x half> %a1) {
 ; CHECK-LABEL: stack_fold_maxph_ymm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -425,7 +425,7 @@ define <16 x half> @stack_fold_maxph_ymm(<16 x half> %a0, <16 x half> %a1) #0 {
 }
 declare <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half>, <16 x half>) nounwind readnone
 
-define <16 x half> @stack_fold_maxph_ymm_commutable(<16 x half> %a0, <16 x half> %a1) #1 {
+define <16 x half> @stack_fold_maxph_ymm_commutable(<16 x half> %a0, <16 x half> %a1) {
 ; CHECK-LABEL: stack_fold_maxph_ymm_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -439,7 +439,7 @@ define <16 x half> @stack_fold_maxph_ymm_commutable(<16 x half> %a0, <16 x half>
   ret <16 x half> %2
 }
 
-define <8 x half> @stack_fold_minph(<8 x half> %a0, <8 x half> %a1) #0 {
+define <8 x half> @stack_fold_minph(<8 x half> %a0, <8 x half> %a1) {
 ; CHECK-LABEL: stack_fold_minph:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -454,7 +454,7 @@ define <8 x half> @stack_fold_minph(<8 x half> %a0, <8 x half> %a1) #0 {
 }
 declare <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half>, <8 x half>) nounwind readnone
 
-define <8 x half> @stack_fold_minph_commutable(<8 x half> %a0, <8 x half> %a1) #1 {
+define <8 x half> @stack_fold_minph_commutable(<8 x half> %a0, <8 x half> %a1) {
 ; CHECK-LABEL: stack_fold_minph_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -468,7 +468,7 @@ define <8 x half> @stack_fold_minph_commutable(<8 x half> %a0, <8 x half> %a1) #
   ret <8 x half> %2
 }
 
-define <16 x half> @stack_fold_minph_ymm(<16 x half> %a0, <16 x half> %a1) #0 {
+define <16 x half> @stack_fold_minph_ymm(<16 x half> %a0, <16 x half> %a1) {
 ; CHECK-LABEL: stack_fold_minph_ymm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -483,7 +483,7 @@ define <16 x half> @stack_fold_minph_ymm(<16 x half> %a0, <16 x half> %a1) #0 {
 }
 declare <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half>, <16 x half>) nounwind readnone
 
-define <16 x half> @stack_fold_minph_ymm_commutable(<16 x half> %a0, <16 x half> %a1) #1 {
+define <16 x half> @stack_fold_minph_ymm_commutable(<16 x half> %a0, <16 x half> %a1) {
 ; CHECK-LABEL: stack_fold_minph_ymm_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -1471,6 +1471,3 @@ define <8 x float> @stack_fold_fcmaddc_maskz_ymm(<8 x float> %a0, <8 x float> %a
   ret <8 x float> %3
 }
 declare <8 x float> @llvm.x86.avx512fp16.maskz.vfcmadd.cph.256(<8 x float>, <8 x float>, <8 x float>, i8)
-
-attributes #0 = { "unsafe-fp-math"="false" }
-attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
index b370a80..bd56e61 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
@@ -457,7 +457,7 @@ define <4 x float> @stack_fold_cvtpd2ps_ymm(<4 x double> %a0) {
   ret <4 x float> %2
 }
 
-define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
+define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxpd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -472,7 +472,7 @@ define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
 }
 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
 
-define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
+define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxpd_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -486,7 +486,7 @@ define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double>
   ret <2 x double> %2
 }
 
-define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 {
+define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxpd_ymm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -501,7 +501,7 @@ define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) #0
 }
 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
 
-define <4 x double> @stack_fold_maxpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) #1 {
+define <4 x double> @stack_fold_maxpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxpd_ymm_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -515,7 +515,7 @@ define <4 x double> @stack_fold_maxpd_ymm_commutable(<4 x double> %a0, <4 x doub
   ret <4 x double> %2
 }
 
-define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 {
+define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxps:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -530,7 +530,7 @@ define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 {
 }
 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
 
-define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
+define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxps_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -544,7 +544,7 @@ define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1
   ret <4 x float> %2
 }
 
-define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
+define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxps_ymm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -559,7 +559,7 @@ define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
 }
 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
 
-define <8 x float> @stack_fold_maxps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 {
+define <8 x float> @stack_fold_maxps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxps_ymm_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -573,7 +573,7 @@ define <8 x float> @stack_fold_maxps_ymm_commutable(<8 x float> %a0, <8 x float>
   ret <8 x float> %2
 }
 
-define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 {
+define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minps:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -588,7 +588,7 @@ define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 {
 }
 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
 
-define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
+define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minps_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -602,7 +602,7 @@ define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1
   ret <4 x float> %2
 }
 
-define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
+define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minps_ymm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -617,7 +617,7 @@ define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
 }
 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
 
-define <8 x float> @stack_fold_minps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 {
+define <8 x float> @stack_fold_minps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minps_ymm_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -687,7 +687,7 @@ define <8 x float> @stack_fold_mulps_ymm(<8 x float> %a0, <8 x float> %a1) {
   ret <8 x float> %2
 }
 
-define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) #0 {
+define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_orpd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -708,7 +708,7 @@ define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) #0 {
   ret <2 x double> %6
 }
 
-define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 {
+define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: stack_fold_orpd_ymm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -939,7 +939,7 @@ define <8 x float> @stack_fold_subps_ymm(<8 x float> %a0, <8 x float> %a1) {
   ret <8 x float> %2
 }
 
-define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) #0 {
+define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_xorpd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -960,7 +960,7 @@ define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) #0 {
   ret <2 x double> %6
 }
 
-define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 {
+define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: stack_fold_xorpd_ymm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -1391,6 +1391,3 @@ declare <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float>, <4 x i32>, <
 declare <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>)
 declare <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>)
 declare <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>)
-
-attributes #0 = { "unsafe-fp-math"="false" }
-attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll b/llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll
index 306ee31..9bc9a9c 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll
@@ -1424,7 +1424,7 @@ define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
 }
 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
 
-define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
+define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxpd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1439,7 +1439,7 @@ define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
 }
 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
 
-define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
+define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxpd_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1453,7 +1453,7 @@ define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double>
   ret <2 x double> %2
 }
 
-define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 {
+define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxps:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1468,7 +1468,7 @@ define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 {
 }
 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
 
-define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
+define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxps_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1482,7 +1482,7 @@ define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1
   ret <4 x float> %2
 }
 
-define double @stack_fold_maxsd(double %a0, double %a1) #0 {
+define double @stack_fold_maxsd(double %a0, double %a1) {
 ; CHECK-LABEL: stack_fold_maxsd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -1497,7 +1497,7 @@ define double @stack_fold_maxsd(double %a0, double %a1) #0 {
   ret double %3
 }
 
-define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 {
+define double @stack_fold_maxsd_commutable(double %a0, double %a1) {
 ; CHECK-LABEL: stack_fold_maxsd_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -1512,7 +1512,7 @@ define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 {
   ret double %3
 }
 
-define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0 {
+define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_maxsd_int:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1527,7 +1527,7 @@ define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0
 }
 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
 
-define float @stack_fold_maxss(float %a0, float %a1) #0 {
+define float @stack_fold_maxss(float %a0, float %a1) {
 ; CHECK-LABEL: stack_fold_maxss:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -1542,7 +1542,7 @@ define float @stack_fold_maxss(float %a0, float %a1) #0 {
   ret float %3
 }
 
-define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 {
+define float @stack_fold_maxss_commutable(float %a0, float %a1) {
 ; CHECK-LABEL: stack_fold_maxss_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -1557,7 +1557,7 @@ define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 {
   ret float %3
 }
 
-define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 {
+define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_maxss_int:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1572,7 +1572,7 @@ define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 {
 }
 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
 
-define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 {
+define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_minpd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1587,7 +1587,7 @@ define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 {
 }
 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
 
-define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
+define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_minpd_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1601,7 +1601,7 @@ define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double>
   ret <2 x double> %2
 }
 
-define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 {
+define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minps:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1616,7 +1616,7 @@ define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 {
 }
 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
 
-define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
+define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minps_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1630,7 +1630,7 @@ define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1
   ret <4 x float> %2
 }
 
-define double @stack_fold_minsd(double %a0, double %a1) #0 {
+define double @stack_fold_minsd(double %a0, double %a1) {
 ; CHECK-LABEL: stack_fold_minsd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -1645,7 +1645,7 @@ define double @stack_fold_minsd(double %a0, double %a1) #0 {
   ret double %3
 }
 
-define double @stack_fold_minsd_commutable(double %a0, double %a1) #1 {
+define double @stack_fold_minsd_commutable(double %a0, double %a1) {
 ; CHECK-LABEL: stack_fold_minsd_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -1660,7 +1660,7 @@ define double @stack_fold_minsd_commutable(double %a0, double %a1) #1 {
   ret double %3
 }
 
-define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) #0 {
+define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: stack_fold_minsd_int:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1675,7 +1675,7 @@ define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) #0
 }
 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
 
-define float @stack_fold_minss(float %a0, float %a1) #0 {
+define float @stack_fold_minss(float %a0, float %a1) {
 ; CHECK-LABEL: stack_fold_minss:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -1690,7 +1690,7 @@ define float @stack_fold_minss(float %a0, float %a1) #0 {
   ret float %3
 }
 
-define float @stack_fold_minss_commutable(float %a0, float %a1) #1 {
+define float @stack_fold_minss_commutable(float %a0, float %a1) {
 ; CHECK-LABEL: stack_fold_minss_commutable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -1705,7 +1705,7 @@ define float @stack_fold_minss_commutable(float %a0, float %a1) #1 {
   ret float %3
 }
 
-define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) #0 {
+define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: stack_fold_minss_int:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2490,6 +2490,3 @@ define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) {
 
 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
-
-attributes #0 = { "unsafe-fp-math"="false" }
-attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/X86/stack-protector-3.ll b/llvm/test/CodeGen/X86/stack-protector-3.ll
index 59784af..8ca6a56 100644
--- a/llvm/test/CodeGen/X86/stack-protector-3.ll
+++ b/llvm/test/CodeGen/X86/stack-protector-3.ll
@@ -118,7 +118,7 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
 ; Function Attrs: argmemonly nounwind willreturn
 declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1
 
-attributes #0 = { nounwind sspreq uwtable writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind sspreq uwtable writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind willreturn }
 attributes #2 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll b/llvm/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
index 63390e4..4bc91bf 100644
--- a/llvm/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
+++ b/llvm/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
@@ -55,6 +55,6 @@ entry:
 
 declare void @f(i32) #1
 
-attributes #0 = { nounwind sspreq "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind sspreq "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/stack_guard_remat.ll b/llvm/test/CodeGen/X86/stack_guard_remat.ll
index f7a602c..f53fa0b4 100644
--- a/llvm/test/CodeGen/X86/stack_guard_remat.ll
+++ b/llvm/test/CodeGen/X86/stack_guard_remat.ll
@@ -23,4 +23,4 @@ declare void @foo3(ptr)
 ; Function Attrs: nounwind
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/X86/tail-merge-wineh.ll b/llvm/test/CodeGen/X86/tail-merge-wineh.ll
index 00bddc1..a208368 100644
--- a/llvm/test/CodeGen/X86/tail-merge-wineh.ll
+++ b/llvm/test/CodeGen/X86/tail-merge-wineh.ll
@@ -101,5 +101,5 @@ declare x86_stdcallcc void @_CxxThrowException(ptr, ptr)
 
 declare i32 @__CxxFrameHandler3(...)
 
-attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { noreturn }
diff --git a/llvm/test/CodeGen/X86/tls-shrink-wrapping.ll b/llvm/test/CodeGen/X86/tls-shrink-wrapping.ll
index 2749ebd..f0d5a16 100644
--- a/llvm/test/CodeGen/X86/tls-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/tls-shrink-wrapping.ll
@@ -51,6 +51,6 @@ if.end:                                           ; preds = %if.then, %entry
 
 declare void @f(...) #1
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/unused_stackslots.ll b/llvm/test/CodeGen/X86/unused_stackslots.ll
index d909dd4..4d390bd 100644
--- a/llvm/test/CodeGen/X86/unused_stackslots.ll
+++ b/llvm/test/CodeGen/X86/unused_stackslots.ll
@@ -215,8 +215,8 @@ declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64,
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #3 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/uwtables.ll b/llvm/test/CodeGen/X86/uwtables.ll
index 1e2e1d9..68a5ff1 100644
--- a/llvm/test/CodeGen/X86/uwtables.ll
+++ b/llvm/test/CodeGen/X86/uwtables.ll
@@ -38,5 +38,5 @@ declare i32 @__gxx_personality_v0(...)
 declare void @__cxa_call_unexpected(ptr) local_unnamed_addr
 
 
-attributes #0 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index 910dd1e..5954e34 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -5435,7 +5435,7 @@ define double @extract3_uitofp_v4i32_f64(<4 x i32> %x) nounwind {
   ret double %r
 }
 
-define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
+define void @PR43609(ptr nocapture %x, <2 x i64> %y) {
 ; SSE2-LABEL: PR43609:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2,2]
@@ -5643,6 +5643,3 @@ define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
   store <2 x double> %t23, ptr %t26, align 8
   ret void
 }
-
-attributes #0 = { "unsafe-fp-math"="true" }
-
diff --git a/llvm/test/CodeGen/X86/vector-sqrt.ll b/llvm/test/CodeGen/X86/vector-sqrt.ll
index b08784a..843f099a 100644
--- a/llvm/test/CodeGen/X86/vector-sqrt.ll
+++ b/llvm/test/CodeGen/X86/vector-sqrt.ll
@@ -63,6 +63,6 @@ entry:
 ; Function Attrs: nounwind readnone
 declare float @sqrtf(float) local_unnamed_addr #1
 
-attributes #0 = { nounwind readonly uwtable "target-features"="+avx" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone  "target-features"="+avx2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readonly uwtable "target-features"="+avx" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone  "target-features"="+avx2" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/X86/vector-width-store-merge.ll b/llvm/test/CodeGen/X86/vector-width-store-merge.ll
index 50c7b01..9363348 100644
--- a/llvm/test/CodeGen/X86/vector-width-store-merge.ll
+++ b/llvm/test/CodeGen/X86/vector-width-store-merge.ll
@@ -85,8 +85,8 @@ entry:
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1 immarg) #1
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "prefer-vector-width"="128" "stack-protector-buffer-size"="8" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "prefer-vector-width"="128" "stack-protector-buffer-size"="8" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind }
-attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "prefer-vector-width"="256" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "prefer-vector-width"="256" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "use-soft-float"="false" }
 
 !0 = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/CodeGen/X86/win-cleanuppad.ll b/llvm/test/CodeGen/X86/win-cleanuppad.ll
index e9265a1..59dcccc 100644
--- a/llvm/test/CodeGen/X86/win-cleanuppad.ll
+++ b/llvm/test/CodeGen/X86/win-cleanuppad.ll
@@ -194,6 +194,6 @@ cleanup.outer:                                      ; preds = %invoke.cont.1, %c
 ; X64-NEXT: .long   .Ltmp7@IMGREL
 ; X64-NEXT: .long   -1
 
-attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/win32-seh-catchpad.ll b/llvm/test/CodeGen/X86/win32-seh-catchpad.ll
index 832ddca..0f51866 100644
--- a/llvm/test/CodeGen/X86/win32-seh-catchpad.ll
+++ b/llvm/test/CodeGen/X86/win32-seh-catchpad.ll
@@ -220,7 +220,7 @@ declare i32 @_except_handler3(...)
 ; Function Attrs: nounwind
 declare void @llvm.localescape(...) #2
 
-attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
 attributes #3 = { noinline }
diff --git a/llvm/test/CodeGen/X86/win32-seh-nested-finally.ll b/llvm/test/CodeGen/X86/win32-seh-nested-finally.ll
index 5b1f9b3..5095460 100644
--- a/llvm/test/CodeGen/X86/win32-seh-nested-finally.ll
+++ b/llvm/test/CodeGen/X86/win32-seh-nested-finally.ll
@@ -34,8 +34,8 @@ declare void @f(i32) #0
 
 declare i32 @_except_handler3(...)
 
-attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { noinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { noinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { noinline }
 
diff --git a/llvm/test/CodeGen/X86/windows-seh-EHa-CppCatchDotDotDot.ll b/llvm/test/CodeGen/X86/windows-seh-EHa-CppCatchDotDotDot.ll
index 785c260..d4d4fe3 100644
--- a/llvm/test/CodeGen/X86/windows-seh-EHa-CppCatchDotDotDot.ll
+++ b/llvm/test/CodeGen/X86/windows-seh-EHa-CppCatchDotDotDot.ll
@@ -272,12 +272,12 @@ declare dso_local void @llvm.seh.try.end() #2
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.eh.exceptioncode(token) #3
 
-attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind willreturn }
 attributes #3 = { nounwind readnone }
-attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #5 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #5 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #6 = { nounwind }
 attributes #7 = { noreturn }
 attributes #8 = { noinline }
diff --git a/llvm/test/CodeGen/X86/windows-seh-EHa-CppDtors01.ll b/llvm/test/CodeGen/X86/windows-seh-EHa-CppDtors01.ll
index 6c6e9c3..b0baaac 100644
--- a/llvm/test/CodeGen/X86/windows-seh-EHa-CppDtors01.ll
+++ b/llvm/test/CodeGen/X86/windows-seh-EHa-CppDtors01.ll
@@ -240,12 +240,12 @@ declare i32 @llvm.eh.exceptioncode(token) #1
 
 declare dso_local void @"?printf@@YAXZZ"(...) #5
 
-attributes #0 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #3 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #4 = { nounwind willreturn }
-attributes #5 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #5 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #6 = { nounwind }
 attributes #7 = { noinline }
 
diff --git a/llvm/test/CodeGen/X86/windows-seh-EHa-TryInFinally.ll b/llvm/test/CodeGen/X86/windows-seh-EHa-TryInFinally.ll
index 9e44299..d3da5f8 100644
--- a/llvm/test/CodeGen/X86/windows-seh-EHa-TryInFinally.ll
+++ b/llvm/test/CodeGen/X86/windows-seh-EHa-TryInFinally.ll
@@ -209,10 +209,10 @@ declare i32 @llvm.eh.exceptioncode(token) #4
 ; Function Attrs: nounwind
 declare void @llvm.localescape(...) #5
 
-attributes #0 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind willreturn }
-attributes #3 = { noinline "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { noinline "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #4 = { nounwind readnone }
 attributes #5 = { nounwind }
 attributes #6 = { noinline }
diff --git a/llvm/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll b/llvm/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
index cb12481..9f888f8 100644
--- a/llvm/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
+++ b/llvm/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
@@ -24,7 +24,7 @@ entry:
   ret i64 %or
 }
 
-attributes #0 = { minsize nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { minsize nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 
 ; clang -Os -c test2.cpp -emit-llvm -S
@@ -63,7 +63,7 @@ entry:
   ret i64 %or
 }
 
-attributes #1 = { nounwind optsize readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 ; clang -O2 -c test2.cpp -emit-llvm -S
 ; Verify that we do not generate shld insruction when we are not optimizing
@@ -89,7 +89,7 @@ entry:
   ret i64 %or
 }
 
-attributes #2= { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2= { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"ProfileSummary", !1}
diff --git a/llvm/test/MC/Disassembler/Xtensa/debug.txt b/llvm/test/MC/Disassembler/Xtensa/debug.txt
index 1321f09..5438760 100644
--- a/llvm/test/MC/Disassembler/Xtensa/debug.txt
+++ b/llvm/test/MC/Disassembler/Xtensa/debug.txt
@@ -9,7 +9,7 @@
 # CHECK-DEBUG: break 1, 1
 # CHECK-CORE: [[#@LINE-2]]:2: warning: invalid instruction encoding
 
-[0x2c,0xf1]
+[0x2d,0xf1]
 # CHECK-DEBUG: break.n 1
 # CHECK-CORE: [[#@LINE-2]]:2: warning: invalid instruction encoding
 
diff --git a/llvm/test/MC/Xtensa/debug.s b/llvm/test/MC/Xtensa/debug.s
index 36b1f11..4ca6368 100644
--- a/llvm/test/MC/Xtensa/debug.s
+++ b/llvm/test/MC/Xtensa/debug.s
@@ -11,7 +11,7 @@ break 1, 1
 
 # Instruction format RRRN
 # CHECK-INST: break.n 1
-# CHECK: encoding: [0x2c,0xf1]
+# CHECK: encoding: [0x2d,0xf1]
 break.n 1
 
 # Instruction format RRR
diff --git a/llvm/test/MachineVerifier/test_g_shuffle_vector.mir b/llvm/test/MachineVerifier/test_g_shuffle_vector.mir
index 6aba6731..c4ca2d2 100644
--- a/llvm/test/MachineVerifier/test_g_shuffle_vector.mir
+++ b/llvm/test/MachineVerifier/test_g_shuffle_vector.mir
@@ -50,10 +50,16 @@ body:             |
     %20:_(<2 x s32>) = G_SHUFFLE_VECTOR %3, %19, shufflemask(1, 0)
 
     ; CHECK: Bad machine code: G_SHUFFLE_VECTOR cannot change element type
-    %21:_(s16) = G_SHUFFLE_VECTOR %3, %4, shufflemask(0)
+    %21:_(s16) = G_SHUFFLE_VECTOR %3, %4, shufflemask(0, 1)
+
+    ; CHECK: Bad machine code: G_SHUFFLE_VECTOR must have vector src
+    %22:_(<2 x s32>) = G_SHUFFLE_VECTOR %3, %4, shufflemask(0, 0)
+
+    %23:_(p0) = G_IMPLICIT_DEF
+    ; CHECK: Bad machine code: G_SHUFFLE_VECTOR must have vector src
+    %24:_(<2 x p0>) = G_SHUFFLE_VECTOR %23, %23, shufflemask(0, 0)
 
     ; CHECK: Bad machine code: Out of bounds shuffle index
-    %22:_(s32) = G_IMPLICIT_DEF
-    %20:_(<2 x s32>) = G_SHUFFLE_VECTOR %22, %22, shufflemask(0, 2)
+    %26:_(<2 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 7)
 
 ...
diff --git a/llvm/test/Transforms/InstCombine/constant-vector-insert.ll b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
new file mode 100644
index 0000000..2688540
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
@@ -0,0 +1,156 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=instcombine %s | FileCheck %s
+; RUN: opt -S -passes=instcombine %s \
+; RUN:   -use-constant-int-for-fixed-length-splat \
+; RUN    -use-constant-fp-for-fixed-length-splat \
+; RUN:   -use-constant-int-for-scalable-splat \
+; RUN:   -use-constant-fp-for-scalable-splat | FileCheck %s
+
+define <vscale x 4 x i32> @insert_div() {
+; CHECK-LABEL: @insert_div(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 3), i64 0)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0)
+  %div = udiv <vscale x 4 x i32> %0, splat (i32 3)
+  ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 4 x i32> @insert_div_splat_lhs() {
+; CHECK-LABEL: @insert_div_splat_lhs(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 5), <4 x i32> splat (i32 2), i64 0)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 2), <4 x i32> splat (i32 5), i64 0)
+  %div = udiv <vscale x 4 x i32> splat (i32 10), %0
+  ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 4 x i32> @insert_div_mixed_splat() {
+; CHECK-LABEL: @insert_div_mixed_splat(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 6), <4 x i32> splat (i32 3), i64 0)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 18), <4 x i32> splat (i32 9), i64 0)
+  %div = udiv <vscale x 4 x i32> %0, splat (i32 3)
+  ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 4 x i32> @insert_mul() {
+; CHECK-LABEL: @insert_mul(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 7), i64 4)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[MUL]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 1), i64 4)
+  %mul = mul <vscale x 4 x i32> %0, splat (i32 7)
+  ret <vscale x 4 x i32> %mul
+}
+
+define <vscale x 4 x i32> @insert_add() {
+; CHECK-LABEL: @insert_add(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 16), i64 0)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 5), i64 0)
+  %add = add <vscale x 4 x i32> %0, splat (i32 11)
+  ret <vscale x 4 x i32> %add
+}
+
+define <vscale x 4 x i32> @insert_add_non_splat_subvector() {
+; CHECK-LABEL: @insert_add_non_splat_subvector(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 101, i32 102, i32 103, i32 104>, i64 0)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i64 0)
+  %add = add <vscale x 4 x i32> %0, splat (i32 100)
+  ret <vscale x 4 x i32> %add
+}
+
+define <vscale x 4 x float> @insert_add_fp() {
+; CHECK-LABEL: @insert_add_fp(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> splat (float 6.250000e+00), <4 x float> splat (float 5.500000e+00), i64 0)
+; CHECK-NEXT:    ret <vscale x 4 x float> [[ADD]]
+;
+entry:
+  %0 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> splat(float 1.25), <4 x float> splat (float 0.5), i64 0)
+  %add = fadd <vscale x 4 x float> %0, splat (float 5.0)
+  ret <vscale x 4 x float> %add
+}
+
+define <vscale x 8 x i32> @insert_add_scalable_subvector() {
+; CHECK-LABEL: @insert_add_scalable_subvector(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat (i32 20), <vscale x 4 x i32> splat (i32 -4), i64 0)
+; CHECK-NEXT:    ret <vscale x 8 x i32> [[ADD]]
+;
+entry:
+  %0 = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat(i32 16), <vscale x 4 x i32> splat (i32 -8), i64 0)
+  %add = add <vscale x 8 x i32> %0, splat (i32 4)
+  ret <vscale x 8 x i32> %add
+}
+
+define <vscale x 4 x i32> @insert_sub() {
+; CHECK-LABEL: @insert_sub(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> zeroinitializer, i64 8)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[SUB]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 11), i64 8)
+  %sub = add <vscale x 4 x i32> %0, splat (i32 -11)
+  ret <vscale x 4 x i32> %sub
+}
+
+define <vscale x 4 x i32> @insert_and_partially_undef() {
+; CHECK-LABEL: @insert_and_partially_undef(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AND:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> zeroinitializer, <4 x i32> splat (i32 4), i64 0)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[AND]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 6), i64 0)
+  %and = and <vscale x 4 x i32> %0, splat (i32 4)
+  ret <vscale x 4 x i32> %and
+}
+
+define <vscale x 4 x i32> @insert_fold_chain() {
+; CHECK-LABEL: @insert_fold_chain(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 11), <4 x i32> splat (i32 8), i64 0)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 21), <4 x i32> splat (i32 12), i64 0)
+  %div = udiv <vscale x 4 x i32> %0, splat (i32 3)
+  %add = add <vscale x 4 x i32> %div, splat (i32 4)
+  ret <vscale x 4 x i32> %add
+}
+
+; TODO: This could be folded more.
+define <vscale x 4 x i32> @insert_add_both_insert_vector() {
+; CHECK-LABEL: @insert_add_both_insert_vector(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 10), <4 x i32> splat (i32 5), i64 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 -1), <4 x i32> splat (i32 2), i64 0)
+; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 10), <4 x i32> splat (i32 5), i64 0)
+  %1 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 -1), <4 x i32> splat (i32 2), i64 0)
+  %add = add <vscale x 4 x i32> %0, %1
+  ret <vscale x 4 x i32> %add
+}
diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder.ll b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
index 66592b0..fdeb497 100644
--- a/llvm/test/Transforms/LoopVectorize/constantfolder.ll
+++ b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
@@ -288,3 +288,72 @@ loop.latch:
 exit:
   ret void
 }
+
+define void @const_fold_binaryintrinsic(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_binaryintrinsic(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    store i64 3, ptr [[DST]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %const.0 = xor i64 %d, %d
+  %trunc = call i64 @llvm.umax.i64(i64 %const.0, i64 3)
+  store i64 %trunc, ptr %dst, align 2
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, 100
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @const_fold_widegep(ptr noalias %A, ptr noalias %B, i64 %d) {
+; CHECK-LABEL: define void @const_fold_widegep(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    store ptr [[A]], ptr [[B]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %const.0 = xor i64 %d, %d
+  %gep.A = getelementptr i64, ptr %A, i64 %const.0
+  %gep.B = getelementptr i64, ptr %B, i64 %const.0
+  store ptr %gep.A, ptr %gep.B
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exit.cond = icmp ult i64 %iv.next, 100
+  br i1 %exit.cond, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_muladd.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_muladd.ll
index 27e8fd0..3b61750 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_muladd.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_muladd.ll
@@ -6,7 +6,7 @@ target triple = "aarch64"
 
 ; This function (a 16x reduction of a[i] * b[i]) should be vectorized successfully.
 
-define dso_local nofpclass(nan inf) float @vmlaq(ptr noundef %0, ptr noundef %1) #0 {
+define dso_local nofpclass(nan inf) float @vmlaq(ptr noundef %0, ptr noundef %1) {
 ; CHECK-LABEL: define dso_local nofpclass(nan inf) float @vmlaq
 ; CHECK-SAME: (ptr noundef readonly captures(none) [[TMP0:%.*]], ptr noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x float>, ptr [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]]
@@ -21,9 +21,9 @@ define dso_local nofpclass(nan inf) float @vmlaq(ptr noundef %0, ptr noundef %1)
   %6 = alloca i32, align 4
   store ptr %0, ptr %3, align 8, !tbaa !4
   store ptr %1, ptr %4, align 8, !tbaa !4
-  call void @llvm.lifetime.start.p0(ptr %5) #2
+  call void @llvm.lifetime.start.p0(ptr %5)
   store float 0.000000e+00, ptr %5, align 4, !tbaa !9
-  call void @llvm.lifetime.start.p0(ptr %6) #2
+  call void @llvm.lifetime.start.p0(ptr %6)
   store i32 0, ptr %6, align 4, !tbaa !11
   br label %7
 
@@ -33,7 +33,7 @@ define dso_local nofpclass(nan inf) float @vmlaq(ptr noundef %0, ptr noundef %1)
   br i1 %9, label %11, label %10
 
 10:                                               ; preds = %7
-  call void @llvm.lifetime.end.p0(ptr %6) #2
+  call void @llvm.lifetime.end.p0(ptr %6)
   br label %28
 
 11:                                               ; preds = %7
@@ -61,16 +61,12 @@ define dso_local nofpclass(nan inf) float @vmlaq(ptr noundef %0, ptr noundef %1)
 
 28:                                               ; preds = %10
   %29 = load float, ptr %5, align 4, !tbaa !9
-  call void @llvm.lifetime.end.p0(ptr %5) #2
+  call void @llvm.lifetime.end.p0(ptr %5)
   ret float %29
 }
 
-declare void @llvm.lifetime.start.p0(ptr captures(none)) #1
-declare void @llvm.lifetime.end.p0(ptr captures(none)) #1
-
-attributes #0 = { nounwind uwtable "frame-pointer"="non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" "unsafe-fp-math"="true" }
-attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-attributes #2 = { nounwind }
+declare void @llvm.lifetime.start.p0(ptr captures(none))
+declare void @llvm.lifetime.end.p0(ptr captures(none))
 
 !llvm.module.flags = !{!0, !1, !2}
 !llvm.ident = !{!3}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_submuladd.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_submuladd.ll
index 68bfbc1..eefde9d 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_submuladd.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_submuladd.ll
@@ -6,7 +6,7 @@ target triple = "aarch64"
 
 ; This function (a more complex reduction of (a[i] - b[i]) * itself) should be vectorized successfully.
 
-define dso_local noundef nofpclass(nan inf) float @_Z4testPKfS0_ii(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 noundef %3) #0 {
+define dso_local noundef nofpclass(nan inf) float @_Z4testPKfS0_ii(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 noundef %3) {
 ; CHECK-LABEL: define dso_local noundef nofpclass(nan inf) float @_Z4testPKfS0_ii
 ; CHECK-SAME: (ptr noundef readonly captures(none) [[TMP0:%.*]], ptr noundef readonly captures(none) [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  .preheader.i:
@@ -125,7 +125,7 @@ define dso_local noundef nofpclass(nan inf) float @_Z4testPKfS0_ii(ptr noundef %
   ret float %13
 }
 
-define internal noundef nofpclass(nan inf) float @_ZL6reduceILi7EEfPKfS1_ii(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 noundef %3) #1 {
+define internal noundef nofpclass(nan inf) float @_ZL6reduceILi7EEfPKfS1_ii(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 noundef %3) {
   %5 = alloca ptr, align 8
   %6 = alloca ptr, align 8
   %7 = alloca i32, align 4
@@ -143,15 +143,15 @@ define internal noundef nofpclass(nan inf) float @_ZL6reduceILi7EEfPKfS1_ii(ptr
   store ptr %1, ptr %6, align 8, !tbaa !4
   store i32 %2, ptr %7, align 4, !tbaa !9
   store i32 %3, ptr %8, align 4, !tbaa !9
-  call void @llvm.lifetime.start.p0(ptr %9) #3
+  call void @llvm.lifetime.start.p0(ptr %9)
   store i32 3, ptr %9, align 4, !tbaa !9
-  call void @llvm.lifetime.start.p0(ptr %10) #3
+  call void @llvm.lifetime.start.p0(ptr %10)
   store i32 3, ptr %10, align 4, !tbaa !9
-  call void @llvm.lifetime.start.p0(ptr %11) #3
+  call void @llvm.lifetime.start.p0(ptr %11)
   store i32 7, ptr %11, align 4, !tbaa !9
-  call void @llvm.lifetime.start.p0(ptr %12) #3
+  call void @llvm.lifetime.start.p0(ptr %12)
   store float 0.000000e+00, ptr %12, align 4, !tbaa !11
-  call void @llvm.lifetime.start.p0(ptr %13) #3
+  call void @llvm.lifetime.start.p0(ptr %13)
   store i32 0, ptr %13, align 4, !tbaa !9
   br label %18
 
@@ -162,13 +162,13 @@ define internal noundef nofpclass(nan inf) float @_ZL6reduceILi7EEfPKfS1_ii(ptr
 
 21:                                               ; preds = %18
   store i32 2, ptr %14, align 4
-  call void @llvm.lifetime.end.p0(ptr %13) #3
+  call void @llvm.lifetime.end.p0(ptr %13)
   br label %62
 
 22:                                               ; preds = %18
-  call void @llvm.lifetime.start.p0(ptr %15) #3
+  call void @llvm.lifetime.start.p0(ptr %15)
   store float 0.000000e+00, ptr %15, align 4, !tbaa !11
-  call void @llvm.lifetime.start.p0(ptr %16) #3
+  call void @llvm.lifetime.start.p0(ptr %16)
   store i32 0, ptr %16, align 4, !tbaa !9
   br label %23
 
@@ -179,11 +179,11 @@ define internal noundef nofpclass(nan inf) float @_ZL6reduceILi7EEfPKfS1_ii(ptr
 
 26:                                               ; preds = %23
   store i32 5, ptr %14, align 4
-  call void @llvm.lifetime.end.p0(ptr %16) #3
+  call void @llvm.lifetime.end.p0(ptr %16)
   br label %47
 
 27:                                               ; preds = %23
-  call void @llvm.lifetime.start.p0(ptr %17) #3
+  call void @llvm.lifetime.start.p0(ptr %17)
   %28 = load ptr, ptr %5, align 8, !tbaa !4
   %29 = load i32, ptr %16, align 4, !tbaa !9
   %30 = sext i32 %29 to i64
@@ -202,7 +202,7 @@ define internal noundef nofpclass(nan inf) float @_ZL6reduceILi7EEfPKfS1_ii(ptr
   %42 = load float, ptr %15, align 4, !tbaa !11
   %43 = fadd fast float %42, %41
   store float %43, ptr %15, align 4, !tbaa !11
-  call void @llvm.lifetime.end.p0(ptr %17) #3
+  call void @llvm.lifetime.end.p0(ptr %17)
   br label %44
 
 44:                                               ; preds = %27
@@ -226,7 +226,7 @@ define internal noundef nofpclass(nan inf) float @_ZL6reduceILi7EEfPKfS1_ii(ptr
   %57 = load float, ptr %12, align 4, !tbaa !11
   %58 = fadd fast float %57, %56
   store float %58, ptr %12, align 4, !tbaa !11
-  call void @llvm.lifetime.end.p0(ptr %15) #3
+  call void @llvm.lifetime.end.p0(ptr %15)
   br label %59
 
 59:                                               ; preds = %47
@@ -238,20 +238,15 @@ define internal noundef nofpclass(nan inf) float @_ZL6reduceILi7EEfPKfS1_ii(ptr
 62:                                               ; preds = %21
   %63 = load float, ptr %12, align 4, !tbaa !11
   store i32 1, ptr %14, align 4
-  call void @llvm.lifetime.end.p0(ptr %12) #3
-  call void @llvm.lifetime.end.p0(ptr %11) #3
-  call void @llvm.lifetime.end.p0(ptr %10) #3
-  call void @llvm.lifetime.end.p0(ptr %9) #3
+  call void @llvm.lifetime.end.p0(ptr %12)
+  call void @llvm.lifetime.end.p0(ptr %11)
+  call void @llvm.lifetime.end.p0(ptr %10)
+  call void @llvm.lifetime.end.p0(ptr %9)
   ret float %63
 }
 
-declare void @llvm.lifetime.start.p0(ptr captures(none)) #2
-declare void @llvm.lifetime.end.p0(ptr captures(none)) #2
-
-attributes #0 = { mustprogress uwtable "frame-pointer"="non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" "unsafe-fp-math"="true" }
-attributes #1 = { inlinehint mustprogress nounwind uwtable "frame-pointer"="non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" "unsafe-fp-math"="true" }
-attributes #2 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-attributes #3 = { nounwind }
+declare void @llvm.lifetime.start.p0(ptr captures(none))
+declare void @llvm.lifetime.end.p0(ptr captures(none))
 
 !llvm.module.flags = !{!0, !1, !2}
 !llvm.ident = !{!3}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
index 92e625d..82ecc3a 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
@@ -10,7 +10,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
 target triple = "aarch64"
 
 ; Function Attrs: nounwind uwtable
-define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 noundef %ip2) #0 {
+define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 noundef %ip2) {
 ; CHECK-LABEL: define range(i32 0, 65536) i32 @slpordering(
 ; CHECK-SAME: ptr noundef readonly captures(none) [[P1:%.*]], i32 noundef [[IP1:%.*]], ptr noundef readonly captures(none) [[P2:%.*]], i32 noundef [[IP2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
@@ -136,14 +136,14 @@ entry:
   store i32 %ip1, ptr %ip1.addr, align 4, !tbaa !8
   store ptr %p2, ptr %p2.addr, align 8, !tbaa !4
   store i32 %ip2, ptr %ip2.addr, align 4, !tbaa !8
-  call void @llvm.lifetime.start.p0(ptr %emp) #2
-  call void @llvm.lifetime.start.p0(ptr %r0) #2
-  call void @llvm.lifetime.start.p0(ptr %r1) #2
-  call void @llvm.lifetime.start.p0(ptr %r2) #2
-  call void @llvm.lifetime.start.p0(ptr %r3) #2
-  call void @llvm.lifetime.start.p0(ptr %sum) #2
+  call void @llvm.lifetime.start.p0(ptr %emp)
+  call void @llvm.lifetime.start.p0(ptr %r0)
+  call void @llvm.lifetime.start.p0(ptr %r1)
+  call void @llvm.lifetime.start.p0(ptr %r2)
+  call void @llvm.lifetime.start.p0(ptr %r3)
+  call void @llvm.lifetime.start.p0(ptr %sum)
   store i32 0, ptr %sum, align 4, !tbaa !8
-  call void @llvm.lifetime.start.p0(ptr %i) #2
+  call void @llvm.lifetime.start.p0(ptr %i)
   store i32 0, ptr %i, align 4, !tbaa !8
   br label %for.cond
 
@@ -153,7 +153,7 @@ for.cond:                                         ; preds = %for.inc, %entry
   br i1 %cmp, label %for.body, label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %for.cond
-  call void @llvm.lifetime.end.p0(ptr %i) #2
+  call void @llvm.lifetime.end.p0(ptr %i)
   br label %for.end
 
 for.body:                                         ; preds = %for.cond
@@ -241,22 +241,22 @@ for.body:                                         ; preds = %for.cond
   %shl42 = shl i32 %sub41, 16
   %rdd43 = add nsw i32 %sub36, %shl42
   store i32 %rdd43, ptr %r3, align 4, !tbaa !8
-  call void @llvm.lifetime.start.p0(ptr %e0) #2
+  call void @llvm.lifetime.start.p0(ptr %e0)
   %33 = load i32, ptr %r0, align 4, !tbaa !8
   %34 = load i32, ptr %r1, align 4, !tbaa !8
   %rdd44 = add i32 %33, %34
   store i32 %rdd44, ptr %e0, align 4, !tbaa !8
-  call void @llvm.lifetime.start.p0(ptr %e1) #2
+  call void @llvm.lifetime.start.p0(ptr %e1)
   %35 = load i32, ptr %r0, align 4, !tbaa !8
   %36 = load i32, ptr %r1, align 4, !tbaa !8
   %sub45 = sub i32 %35, %36
   store i32 %sub45, ptr %e1, align 4, !tbaa !8
-  call void @llvm.lifetime.start.p0(ptr %e2) #2
+  call void @llvm.lifetime.start.p0(ptr %e2)
   %37 = load i32, ptr %r2, align 4, !tbaa !8
   %38 = load i32, ptr %r3, align 4, !tbaa !8
   %rdd46 = add i32 %37, %38
   store i32 %rdd46, ptr %e2, align 4, !tbaa !8
-  call void @llvm.lifetime.start.p0(ptr %e3) #2
+  call void @llvm.lifetime.start.p0(ptr %e3)
   %39 = load i32, ptr %r2, align 4, !tbaa !8
   %40 = load i32, ptr %r3, align 4, !tbaa !8
   %sub47 = sub i32 %39, %40
@@ -293,10 +293,10 @@ for.body:                                         ; preds = %for.cond
   %rrrayidx61 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 %idxprom60
   %rrrayidx62 = getelementptr inbounds [4 x i32], ptr %rrrayidx61, i64 0, i64 3
   store i32 %sub59, ptr %rrrayidx62, align 4, !tbaa !8
-  call void @llvm.lifetime.end.p0(ptr %e3) #2
-  call void @llvm.lifetime.end.p0(ptr %e2) #2
-  call void @llvm.lifetime.end.p0(ptr %e1) #2
-  call void @llvm.lifetime.end.p0(ptr %e0) #2
+  call void @llvm.lifetime.end.p0(ptr %e3)
+  call void @llvm.lifetime.end.p0(ptr %e2)
+  call void @llvm.lifetime.end.p0(ptr %e1)
+  call void @llvm.lifetime.end.p0(ptr %e0)
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
@@ -316,7 +316,7 @@ for.inc:                                          ; preds = %for.body
   br label %for.cond, !llvm.loop !11
 
 for.end:                                          ; preds = %for.cond.cleanup
-  call void @llvm.lifetime.start.p0(ptr %i65) #2
+  call void @llvm.lifetime.start.p0(ptr %i65)
   store i32 0, ptr %i65, align 4, !tbaa !8
   br label %for.cond66
 
@@ -326,11 +326,11 @@ for.cond66:                                       ; preds = %for.inc114, %for.en
   br i1 %cmp67, label %for.body70, label %for.cond.cleanup69
 
 for.cond.cleanup69:                               ; preds = %for.cond66
-  call void @llvm.lifetime.end.p0(ptr %i65) #2
+  call void @llvm.lifetime.end.p0(ptr %i65)
   br label %for.end116
 
 for.body70:                                       ; preds = %for.cond66
-  call void @llvm.lifetime.start.p0(ptr %e071) #2
+  call void @llvm.lifetime.start.p0(ptr %e071)
   %rrrayidx72 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 0
   %59 = load i32, ptr %i65, align 4, !tbaa !8
   %idxprom73 = sext i32 %59 to i64
@@ -343,7 +343,7 @@ for.body70:                                       ; preds = %for.cond66
   %62 = load i32, ptr %rrrayidx77, align 4, !tbaa !8
   %rdd78 = add i32 %60, %62
   store i32 %rdd78, ptr %e071, align 4, !tbaa !8
-  call void @llvm.lifetime.start.p0(ptr %e179) #2
+  call void @llvm.lifetime.start.p0(ptr %e179)
   %rrrayidx80 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 0
   %63 = load i32, ptr %i65, align 4, !tbaa !8
   %idxprom81 = sext i32 %63 to i64
@@ -356,7 +356,7 @@ for.body70:                                       ; preds = %for.cond66
   %66 = load i32, ptr %rrrayidx85, align 4, !tbaa !8
   %sub86 = sub i32 %64, %66
   store i32 %sub86, ptr %e179, align 4, !tbaa !8
-  call void @llvm.lifetime.start.p0(ptr %e287) #2
+  call void @llvm.lifetime.start.p0(ptr %e287)
   %rrrayidx88 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 2
   %67 = load i32, ptr %i65, align 4, !tbaa !8
   %idxprom89 = sext i32 %67 to i64
@@ -369,7 +369,7 @@ for.body70:                                       ; preds = %for.cond66
   %70 = load i32, ptr %rrrayidx93, align 4, !tbaa !8
   %rdd94 = add i32 %68, %70
   store i32 %rdd94, ptr %e287, align 4, !tbaa !8
-  call void @llvm.lifetime.start.p0(ptr %e395) #2
+  call void @llvm.lifetime.start.p0(ptr %e395)
   %rrrayidx96 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 2
   %71 = load i32, ptr %i65, align 4, !tbaa !8
   %idxprom97 = sext i32 %71 to i64
@@ -398,10 +398,10 @@ for.body70:                                       ; preds = %for.cond66
   %82 = load i32, ptr %e395, align 4, !tbaa !8
   %sub106 = sub nsw i32 %81, %82
   store i32 %sub106, ptr %r3, align 4, !tbaa !8
-  call void @llvm.lifetime.end.p0(ptr %e395) #2
-  call void @llvm.lifetime.end.p0(ptr %e287) #2
-  call void @llvm.lifetime.end.p0(ptr %e179) #2
-  call void @llvm.lifetime.end.p0(ptr %e071) #2
+  call void @llvm.lifetime.end.p0(ptr %e395)
+  call void @llvm.lifetime.end.p0(ptr %e287)
+  call void @llvm.lifetime.end.p0(ptr %e179)
+  call void @llvm.lifetime.end.p0(ptr %e071)
   %83 = load i32, ptr %r0, align 4, !tbaa !8
   %call = call i32 @twoabs(i32 noundef %83)
   %84 = load i32, ptr %r1, align 4, !tbaa !8
@@ -432,28 +432,28 @@ for.end116:                                       ; preds = %for.cond.cleanup69
   %shr = lshr i32 %90, 16
   %rdd119 = add i32 %conv118, %shr
   %shr120 = lshr i32 %rdd119, 1
-  call void @llvm.lifetime.end.p0(ptr %sum) #2
-  call void @llvm.lifetime.end.p0(ptr %r3) #2
-  call void @llvm.lifetime.end.p0(ptr %r2) #2
-  call void @llvm.lifetime.end.p0(ptr %r1) #2
-  call void @llvm.lifetime.end.p0(ptr %r0) #2
-  call void @llvm.lifetime.end.p0(ptr %emp) #2
+  call void @llvm.lifetime.end.p0(ptr %sum)
+  call void @llvm.lifetime.end.p0(ptr %r3)
+  call void @llvm.lifetime.end.p0(ptr %r2)
+  call void @llvm.lifetime.end.p0(ptr %r1)
+  call void @llvm.lifetime.end.p0(ptr %r0)
+  call void @llvm.lifetime.end.p0(ptr %emp)
   ret i32 %shr120
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.lifetime.start.p0(ptr nocapture) #1
+declare void @llvm.lifetime.start.p0(ptr nocapture)
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.lifetime.end.p0(ptr nocapture) #1
+declare void @llvm.lifetime.end.p0(ptr nocapture)
 
 ; Function Attrs: nounwind uwtable
-define internal i32 @twoabs(i32 noundef %r) #0 {
+define internal i32 @twoabs(i32 noundef %r) {
 entry:
   %r.addr = alloca i32, align 4
   %s = alloca i32, align 4
   store i32 %r, ptr %r.addr, align 4, !tbaa !8
-  call void @llvm.lifetime.start.p0(ptr %s) #2
+  call void @llvm.lifetime.start.p0(ptr %s)
   %0 = load i32, ptr %r.addr, align 4, !tbaa !8
   %shr = lshr i32 %0, 15
   %rnd = and i32 %shr, 65537
@@ -464,14 +464,10 @@ entry:
   %rdd = add i32 %1, %2
   %3 = load i32, ptr %s, align 4, !tbaa !8
   %xor = xor i32 %rdd, %3
-  call void @llvm.lifetime.end.p0(ptr %s) #2
+  call void @llvm.lifetime.end.p0(ptr %s)
   ret i32 %xor
 }
 
-attributes #0 = { nounwind uwtable "frame-pointer"="non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" "unsafe-fp-math"="true" }
-attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-attributes #2 = { nounwind }
-
 !4 = !{!5, !5, i64 0}
 !5 = !{!"any pointer", !6, i64 0}
 !6 = !{!"omnipotent char", !7, i64 0}
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_add_q7.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_add_q7.ll
index 7fb72e6..811957c 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_add_q7.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_add_q7.ll
@@ -170,5 +170,5 @@ unreachable:                                      ; preds = %cleanup
 declare void @llvm.lifetime.start.p0(ptr nocapture)
 declare void @llvm.lifetime.end.p0(ptr nocapture)
 
-attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" "unsafe-fp-math"="true" }
-attributes #1 = { alwaysinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" "unsafe-fp-math"="true" }
+attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" }
+attributes #1 = { alwaysinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" }
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_fill_q7.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_fill_q7.ll
index 436f848a..6735577 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_fill_q7.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_fill_q7.ll
@@ -9,7 +9,7 @@ target triple = "thumbv6m-none-none-eabi"
 ; should be deleted, too.
 
 ; Function Attrs: nounwind
-define dso_local void @arm_fill_q7(i8 signext %value, ptr %pDst, i32 %blockSize) #0 {
+define dso_local void @arm_fill_q7(i8 signext %value, ptr %pDst, i32 %blockSize) {
 ; OLDPM-LABEL: @arm_fill_q7(
 ; OLDPM-NEXT:  entry:
 ; OLDPM-NEXT:    [[CMP_NOT20:%.*]] = icmp ult i32 [[BLOCKSIZE:%.*]], 4
@@ -59,8 +59,8 @@ entry:
   store i8 %value, ptr %value.addr, align 1, !tbaa !3
   store ptr %pDst, ptr %pDst.addr, align 4, !tbaa !6
   store i32 %blockSize, ptr %blockSize.addr, align 4, !tbaa !8
-  call void @llvm.lifetime.start.p0(ptr %blkCnt) #3
-  call void @llvm.lifetime.start.p0(ptr %packedValue) #3
+  call void @llvm.lifetime.start.p0(ptr %blkCnt)
+  call void @llvm.lifetime.start.p0(ptr %packedValue)
   %0 = load i8, ptr %value.addr, align 1, !tbaa !3
   %conv = sext i8 %0 to i32
   %shl = shl i32 %conv, 0
@@ -122,23 +122,23 @@ while.body16:                                     ; preds = %while.cond13
   br label %while.cond13, !llvm.loop !12
 
 while.end18:                                      ; preds = %while.cond13
-  call void @llvm.lifetime.end.p0(ptr %packedValue) #3
-  call void @llvm.lifetime.end.p0(ptr %blkCnt) #3
+  call void @llvm.lifetime.end.p0(ptr %packedValue)
+  call void @llvm.lifetime.end.p0(ptr %blkCnt)
   ret void
 }
 
 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0(ptr nocapture) #1
+declare void @llvm.lifetime.start.p0(ptr nocapture)
 
 ; Function Attrs: alwaysinline nounwind
-define internal void @write_q7x4_ia(ptr %pQ7, i32 %value) #2 {
+define internal void @write_q7x4_ia(ptr %pQ7, i32 %value) {
 entry:
   %pQ7.addr = alloca ptr, align 4
   %value.addr = alloca i32, align 4
   %val = alloca i32, align 4
   store ptr %pQ7, ptr %pQ7.addr, align 4, !tbaa !6
   store i32 %value, ptr %value.addr, align 4, !tbaa !8
-  call void @llvm.lifetime.start.p0(ptr %val) #3
+  call void @llvm.lifetime.start.p0(ptr %val)
   %0 = load i32, ptr %value.addr, align 4, !tbaa !8
   store i32 %0, ptr %val, align 4, !tbaa !8
   %1 = load i32, ptr %val, align 4, !tbaa !8
@@ -175,17 +175,12 @@ entry:
   %14 = load ptr, ptr %13, align 4, !tbaa !6
   %add.ptr = getelementptr inbounds i8, ptr %14, i32 4
   store ptr %add.ptr, ptr %13, align 4, !tbaa !6
-  call void @llvm.lifetime.end.p0(ptr %val) #3
+  call void @llvm.lifetime.end.p0(ptr %val)
   ret void
 }
 
 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0(ptr nocapture) #1
-
-attributes #0 = { nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m0plus" "target-features"="+armv6-m,+strict-align,+thumb-mode,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-dotprod,-dsp,-fp16fml,-fullfp16,-hwdiv,-hwdiv-arm,-i8mm,-lob,-mve,-mve.fp,-ras,-sb,-sha2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { argmemonly nofree nosync nounwind willreturn }
-attributes #2 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m0plus" "target-features"="+armv6-m,+strict-align,+thumb-mode,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-dotprod,-dsp,-fp16fml,-fullfp16,-hwdiv,-hwdiv-arm,-i8mm,-lob,-mve,-mve.fp,-ras,-sb,-sha2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind }
+declare void @llvm.lifetime.end.p0(ptr nocapture)
 
 !llvm.module.flags = !{!0, !1}
 !llvm.ident = !{!2}
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
index c186207..4274719 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
@@ -133,7 +133,7 @@ declare void @llvm.lifetime.start.p0(ptr nocapture) #1
 declare i32 @llvm.arm.mve.addv.v16i8(<16 x i8>, i32) #2
 declare void @llvm.lifetime.end.p0(ptr nocapture) #1
 
-attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-pacbti,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" "unsafe-fp-math"="true" }
+attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-pacbti,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" }
 attributes #1 = { argmemonly nocallback nofree nosync nounwind willreturn }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
index 42fdafb..5127b7d 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
@@ -216,7 +216,7 @@ unreachable:                                      ; preds = %cleanup
 
 declare void @llvm.lifetime.end.p0(ptr nocapture) #1
 
-attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" "unsafe-fp-math"="true" }
+attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" }
 attributes #1 = { argmemonly nofree nosync nounwind willreturn }
-attributes #2 = { alwaysinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" "unsafe-fp-math"="true" }
+attributes #2 = { alwaysinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" }
 attributes #3 = { nounwind }
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
index 8e25c9c..c5f56d3 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
@@ -15,7 +15,7 @@
 ; Ideally, this should reach the backend with 1 fmul, 1 fsub, 1 fadd, and 1 shuffle.
 ; That may require some coordination between VectorCombine, SLP, and other passes.
 
-define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) #0 {
+define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) {
 ; CHECK-LABEL: @buildvector_mul_addsub_ps128(
 ; CHECK-NEXT:    [[A:%.*]] = fmul <4 x float> [[C:%.*]], [[D:%.*]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = fsub <4 x float> [[A]], [[B:%.*]]
@@ -43,7 +43,7 @@ define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D,
   ret <4 x float> %vecinsert4
 }
 
-define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) #0 {
+define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) {
 ; CHECK-LABEL: @buildvector_mul_addsub_pd128(
 ; CHECK-NEXT:    [[A:%.*]] = fmul <2 x double> [[C:%.*]], [[D:%.*]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = fsub <2 x double> [[A]], [[B:%.*]]
@@ -63,7 +63,7 @@ define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double>
   ret <2 x double> %vecinsert2
 }
 
-define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) #0 {
+define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) {
 ; SSE2-LABEL: @buildvector_mul_addsub_ps256(
 ; SSE2-NEXT:    [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]]
 ; SSE2-NEXT:    [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]]
@@ -123,7 +123,7 @@ define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D,
   ret <8 x float> %vecinsert8
 }
 
-define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) #0 {
+define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) {
 ; CHECK-LABEL: @buildvector_mul_addsub_pd256(
 ; CHECK-NEXT:    [[A:%.*]] = fmul <4 x double> [[C:%.*]], [[D:%.*]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = fsub <4 x double> [[A]], [[B:%.*]]
@@ -151,7 +151,7 @@ define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double>
   ret <4 x double> %vecinsert4
 }
 
-define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) #0 {
+define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) {
 ; SSE2-LABEL: @buildvector_mul_addsub_ps512(
 ; SSE2-NEXT:    [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]]
 ; SSE2-NEXT:    [[TMP1:%.*]] = fsub <16 x float> [[A]], [[B:%.*]]
@@ -243,7 +243,7 @@ define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float>
   ret <16 x float> %vecinsert16
 }
 
-define <16 x float> @buildvector_mul_addsub_ps512_partial(<16 x float> %C, <16 x float> %D, <16 x float> %B) #0 {
+define <16 x float> @buildvector_mul_addsub_ps512_partial(<16 x float> %C, <16 x float> %D, <16 x float> %B) {
 ; SSE-LABEL: @buildvector_mul_addsub_ps512_partial(
 ; SSE-NEXT:    [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]]
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13>
@@ -350,7 +350,7 @@ define <16 x float> @buildvector_mul_addsub_ps512_partial(<16 x float> %C, <16 x
   ret <16 x float> %vecinsert16
 }
 
-define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) #0 {
+define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) {
 ; SSE2-LABEL: @buildvector_mul_addsub_pd512(
 ; SSE2-NEXT:    [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]]
 ; SSE2-NEXT:    [[TMP1:%.*]] = fsub <8 x double> [[A]], [[B:%.*]]
@@ -410,7 +410,7 @@ define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double>
   ret <8 x double> %vecinsert8
 }
 
-define <8 x double> @buildvector_mul_addsub_pd512_partial(<8 x double> %C, <8 x double> %D, <8 x double> %B) #0 {
+define <8 x double> @buildvector_mul_addsub_pd512_partial(<8 x double> %C, <8 x double> %D, <8 x double> %B) {
 ; SSE-LABEL: @buildvector_mul_addsub_pd512_partial(
 ; SSE-NEXT:    [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]]
 ; SSE-NEXT:    [[TMP1:%.*]] = fsub <8 x double> [[A]], [[B:%.*]]
@@ -507,7 +507,7 @@ define <8 x double> @buildvector_mul_addsub_pd512_partial(<8 x double> %C, <8 x
   ret <8 x double> %vecinsert8
 }
 
-define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) #0 {
+define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) {
 ; CHECK-LABEL: @buildvector_mul_subadd_ps128(
 ; CHECK-NEXT:    [[A:%.*]] = fmul <4 x float> [[C:%.*]], [[D:%.*]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = fadd <4 x float> [[A]], [[B:%.*]]
@@ -535,7 +535,7 @@ define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D,
   ret <4 x float> %vecinsert4
 }
 
-define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) #0 {
+define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) {
 ; CHECK-LABEL: @buildvector_mul_subadd_pd128(
 ; CHECK-NEXT:    [[A:%.*]] = fmul <2 x double> [[C:%.*]], [[D:%.*]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = fadd <2 x double> [[A]], [[B:%.*]]
@@ -555,7 +555,7 @@ define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double>
   ret <2 x double> %vecinsert2
 }
 
-define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) #0 {
+define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) {
 ; SSE2-LABEL: @buildvector_mul_subadd_ps256(
 ; SSE2-NEXT:    [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]]
 ; SSE2-NEXT:    [[TMP0:%.*]] = fadd <8 x float> [[A]], [[B:%.*]]
@@ -634,7 +634,7 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D,
   ret <8 x float> %vecinsert8
 }
 
-define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) #0 {
+define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) {
 ; CHECK-LABEL: @buildvector_mul_subadd_pd256(
 ; CHECK-NEXT:    [[A:%.*]] = fmul <4 x double> [[C:%.*]], [[D:%.*]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = fadd <4 x double> [[A]], [[B:%.*]]
@@ -662,7 +662,7 @@ define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double>
   ret <4 x double> %vecinsert4
 }
 
-define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) #0 {
+define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) {
 ; SSE-LABEL: @buildvector_mul_subadd_ps512(
 ; SSE-NEXT:    [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]]
 ; SSE-NEXT:    [[TMP1:%.*]] = fadd <16 x float> [[A]], [[B:%.*]]
@@ -756,7 +756,7 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float>
   ret <16 x float> %vecinsert16
 }
 
-define <16 x float> @buildvector_mul_subadd_ps512_partial(<16 x float> %C, <16 x float> %D, <16 x float> %B) #0 {
+define <16 x float> @buildvector_mul_subadd_ps512_partial(<16 x float> %C, <16 x float> %D, <16 x float> %B) {
 ; SSE-LABEL: @buildvector_mul_subadd_ps512_partial(
 ; SSE-NEXT:    [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]]
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13>
@@ -863,7 +863,7 @@ define <16 x float> @buildvector_mul_subadd_ps512_partial(<16 x float> %C, <16 x
   ret <16 x float> %vecinsert16
 }
 
-define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) #0 {
+define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) {
 ; SSE-LABEL: @buildvector_mul_subadd_pd512(
 ; SSE-NEXT:    [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]]
 ; SSE-NEXT:    [[TMP0:%.*]] = fadd <8 x double> [[A]], [[B:%.*]]
@@ -925,7 +925,7 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double>
   ret <8 x double> %vecinsert8
 }
 
-define <8 x double> @buildvector_mul_subadd_pd512_partial(<8 x double> %C, <8 x double> %D, <8 x double> %B) #0 {
+define <8 x double> @buildvector_mul_subadd_pd512_partial(<8 x double> %C, <8 x double> %D, <8 x double> %B) {
 ; SSE-LABEL: @buildvector_mul_subadd_pd512_partial(
 ; SSE-NEXT:    [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]]
 ; SSE-NEXT:    [[TMP1:%.*]] = fadd <8 x double> [[A]], [[B:%.*]]
@@ -1021,5 +1021,3 @@ define <8 x double> @buildvector_mul_subadd_pd512_partial(<8 x double> %C, <8 x
   %vecinsert8 = insertelement <8 x double> %vecinsert7, double %add7, i32 7
   ret <8 x double> %vecinsert8
 }
-
-attributes #0 = { nounwind "unsafe-fp-math"="true" }
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll
index ae6f4a7..57637d6 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll
@@ -14,7 +14,7 @@ target triple = "x86_64-apple-macosx11.0.0"
 ;        a[i] /= b;
 ;  }
 
-define void @vdiv(ptr %a, float %b) #0 {
+define void @vdiv(ptr %a, float %b) {
 ; CHECK-LABEL: define void @vdiv(
 ; CHECK-SAME: ptr captures(none) [[A:%.*]], float [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
@@ -40,7 +40,7 @@ entry:
   %i = alloca i32, align 4
   store ptr %a, ptr %a.addr, align 8, !tbaa !3
   store float %b, ptr %b.addr, align 4, !tbaa !7
-  call void @llvm.lifetime.start.p0(ptr %i) #2
+  call void @llvm.lifetime.start.p0(ptr %i)
   store i32 0, ptr %i, align 4, !tbaa !9
   br label %for.cond
 
@@ -50,7 +50,7 @@ for.cond:                                         ; preds = %for.inc, %entry
   br i1 %cmp, label %for.body, label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %for.cond
-  call void @llvm.lifetime.end.p0(ptr %i) #2
+  call void @llvm.lifetime.end.p0(ptr %i)
   br label %for.end
 
 for.body:                                         ; preds = %for.cond
@@ -74,12 +74,8 @@ for.end:                                          ; preds = %for.cond.cleanup
   ret void
 }
 
-declare void @llvm.lifetime.start.p0(ptr nocapture) #1
-declare void @llvm.lifetime.end.p0(ptr nocapture) #1
-
-attributes #0 = { nounwind ssp uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" "unsafe-fp-math"="true" }
-attributes #1 = { argmemonly nofree nosync nounwind willreturn }
-attributes #2 = { nounwind }
+declare void @llvm.lifetime.start.p0(ptr nocapture)
+declare void @llvm.lifetime.end.p0(ptr nocapture)
 
 !llvm.module.flags = !{!0, !1}
 !llvm.ident = !{!2}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
index bcdf90c..bfb8554 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
@@ -211,7 +211,7 @@ for.end:
   ret void
 }
 
-attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-jump-tables"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "use-soft-float"="false" }
 
 !llvm.module.flags = !{!0, !1}
 !llvm.ident = !{!2}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll
index dd5ff12..987a528 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll
@@ -8,7 +8,7 @@
 target triple = "x86_64--"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-define i32 @add_v4i32(ptr %p) #0 {
+define i32 @add_v4i32(ptr %p) {
 ; CHECK-LABEL: @add_v4i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
@@ -46,7 +46,7 @@ for.end:
   ret i32 %r.0
 }
 
-define signext i16 @mul_v8i16(ptr %p) #0 {
+define signext i16 @mul_v8i16(ptr %p) {
 ; CHECK-LABEL: @mul_v8i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2, !tbaa [[TBAA4:![0-9]+]]
@@ -89,7 +89,7 @@ for.end:
   ret i16 %r.0
 }
 
-define signext i8 @or_v16i8(ptr %p) #0 {
+define signext i8 @or_v16i8(ptr %p) {
 ; CHECK-LABEL: @or_v16i8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 1, !tbaa [[TBAA6:![0-9]+]]
@@ -134,7 +134,7 @@ for.end:
   ret i8 %r.0
 }
 
-define i32 @smin_v4i32(ptr %p) #0 {
+define i32 @smin_v4i32(ptr %p) {
 ; CHECK-LABEL: @smin_v4i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0]]
@@ -185,7 +185,7 @@ for.end:
   ret i32 %r.0
 }
 
-define i32 @umax_v4i32(ptr %p) #0 {
+define i32 @umax_v4i32(ptr %p) {
 ; CHECK-LABEL: @umax_v4i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0]]
@@ -236,7 +236,7 @@ for.end:
   ret i32 %r.0
 }
 
-define float @fadd_v4i32(ptr %p) #0 {
+define float @fadd_v4i32(ptr %p) {
 ; CHECK-LABEL: @fadd_v4i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4, !tbaa [[TBAA7:![0-9]+]]
@@ -275,7 +275,7 @@ for.end:
   ret float %r.0
 }
 
-define float @fmul_v4i32(ptr %p) #0 {
+define float @fmul_v4i32(ptr %p) {
 ; CHECK-LABEL: @fmul_v4i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4, !tbaa [[TBAA7]]
@@ -315,7 +315,7 @@ for.end:
   ret float %r.0
 }
 
-define float @fmin_v4f32(ptr %p) #0 {
+define float @fmin_v4f32(ptr %p) {
 ; CHECK-LABEL: @fmin_v4f32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4, !tbaa [[TBAA7]]
@@ -440,8 +440,6 @@ entry:
   ret float %call13
 }
 
-attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+avx,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="true" "use-soft-float"="false" }
-
 !0 = !{i32 1, !"wchar_size", i32 4}
 !1 = !{i32 7, !"PIC Level", i32 2}
 !2 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git a9fe69c359de653015c39e413e48630d069abe27)"}
diff --git a/llvm/test/Transforms/SLPVectorizer/NVPTX/vectorizable-intrinsic.ll b/llvm/test/Transforms/SLPVectorizer/NVPTX/vectorizable-intrinsic.ll
index 3a56258..0bd9a99 100644
--- a/llvm/test/Transforms/SLPVectorizer/NVPTX/vectorizable-intrinsic.ll
+++ b/llvm/test/Transforms/SLPVectorizer/NVPTX/vectorizable-intrinsic.ll
@@ -5,9 +5,9 @@ target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"
 target triple = "nvptx--nvidiacl"
 
 ; Vector versions of the intrinsics are scalarized, so keep them scalar
-define <2 x i8> @cltz_test(<2 x i8> %x) #0 {
+define <2 x i8> @cltz_test(<2 x i8> %x) {
 ; CHECK-LABEL: define <2 x i8> @cltz_test(
-; CHECK-SAME: <2 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: <2 x i8> [[X:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i8> [[X]], i32 0
 ; CHECK-NEXT:    [[CALL_I:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP0]], i1 false)
@@ -27,10 +27,9 @@ entry:
   ret <2 x i8> %vecinit2
 }
 
-
-define <2 x i8> @cltz_test_poison(<2 x i8> %x) #0 {
+define <2 x i8> @cltz_test_poison(<2 x i8> %x) {
 ; CHECK-LABEL: define <2 x i8> @cltz_test_poison(
-; CHECK-SAME: <2 x i8> [[X:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: <2 x i8> [[X:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i8> [[X]], i32 0
 ; CHECK-NEXT:    [[CALL_I:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP0]], i1 false)
@@ -50,7 +49,5 @@ entry:
   ret <2 x i8> %vecinit2
 }
 
-declare i8 @llvm.ctlz.i8(i8, i1) #3
-
-attributes #0 = { alwaysinline nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
+declare i8 @llvm.ctlz.i8(i8, i1)
+ "unsafe-fp-math"="false"
+\ No newline at end of file
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
index 29589f3..def73d7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113 = type { [4 x float] }
 
 ; Function Attrs: ssp uwtable
-define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %vertices, i1 %arg) #0 align 2 {
+define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %vertices, i1 %arg) align 2 {
 ; CHECK-LABEL: @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[ARG:%.*]], label [[RETURN:%.*]], label [[IF_END:%.*]]
@@ -128,5 +128,3 @@ if.then17.2:                                      ; preds = %if.end22.1
 if.end22.2:                                       ; preds = %if.then17.2, %if.end22.1
   br i1 %arg, label %for.end36, label %for.body
 }
-
-attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_flop7.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_flop7.ll
index fc1bd85..02e18d6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_flop7.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_flop7.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; Function Attrs: nounwind ssp uwtable
-define void @main(i1 %arg) #0 {
+define void @main(i1 %arg) {
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 %arg, label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
@@ -73,5 +73,3 @@ for.body267:                                      ; preds = %for.body267, %for.b
 for.end300:                                       ; preds = %for.body267, %for.end80
   unreachable
 }
-
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
index f98a569..686befe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 ;   A[8] = y0; A[8+1] = y1;
 ; }
 
-define i32 @depth(ptr nocapture %A, i32 %m) #0 !dbg !4 {
+define i32 @depth(ptr nocapture %A, i32 %m) !dbg !4 {
 ; CHECK-LABEL: @depth(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:      #dbg_value(ptr [[A:%.*]], [[META12:![0-9]+]], !DIExpression(), [[META18:![0-9]+]])
@@ -60,10 +60,7 @@ for.end:                                          ; preds = %for.body.lr.ph, %en
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
-
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18, !32}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
index 1b76ee9..4d18bf8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
@@ -6,7 +6,7 @@ target triple = "i386--netbsd"
 @a = common global ptr null, align 4
 
 ; Function Attrs: noreturn nounwind readonly
-define i32 @fn1() #0 {
+define i32 @fn1() {
 ; CHECK-LABEL: define i32 @fn1(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
@@ -37,8 +37,6 @@ do.body:                                          ; preds = %do.body, %entry
   br label %do.body
 }
 
-attributes #0 = { noreturn nounwind readonly "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
 !0 = !{!"any pointer", !1}
 !1 = !{!"omnipotent char", !2}
 !2 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
index 9e8cdc6..538751f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; The GEP has scalar and vector parameters and returns vector of pointers.
 
 ; Function Attrs: noreturn readonly uwtable
-define void @_Z3fn1v(i32 %x, <16 x ptr>%y) local_unnamed_addr #0 {
+define void @_Z3fn1v(i32 %x, <16 x ptr>%y) local_unnamed_addr {
 ; CHECK-LABEL: @_Z3fn1v(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV42_LE:%.*]] = sext i32 [[X:%.*]] to i64
@@ -25,6 +25,3 @@ entry:
   %VectorGep208 = getelementptr i32, <16 x ptr> %y, i64 %conv42.le
   unreachable
 }
-
-attributes #0 = { noreturn readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll
index 79bef36..8077595 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; the method implementation and it is not guaranteed that the best option
 ; encountered first (like here).
 
-define double @root_selection(double %a, double %b, double %c, double %d) local_unnamed_addr #0 {
+define double @root_selection(double %a, double %b, double %c, double %d) local_unnamed_addr {
 ; CHECK-LABEL: @root_selection(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[B:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A:%.*]], i32 1
@@ -53,5 +53,3 @@ define double @root_selection(double %a, double %b, double %c, double %d) local_
   %i18 = fadd fast double %i17, %d
   ret double %i18
 }
-
-attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/llvm/test/Transforms/SLPVectorizer/consecutive-access.ll b/llvm/test/Transforms/SLPVectorizer/consecutive-access.ll
index 369ca28..bdc09ed 100644
--- a/llvm/test/Transforms/SLPVectorizer/consecutive-access.ll
+++ b/llvm/test/Transforms/SLPVectorizer/consecutive-access.ll
@@ -8,7 +8,7 @@
 @D = common global [2000 x float] zeroinitializer, align 16
 
 ; Function Attrs: nounwind ssp uwtable
-define void @foo_3double(i32 %u) #0 {
+define void @foo_3double(i32 %u) {
 ; CHECK-LABEL: @foo_3double(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
@@ -65,7 +65,7 @@ entry:
 ; A[C1 + C2*i] are consecutive, if C2 is a power of 2, and C2 > C1 > 0.
 ; Thus, the following code should be vectorized.
 ; Function Attrs: nounwind ssp uwtable
-define void @foo_2double(i32 %u) #0 {
+define void @foo_2double(i32 %u) {
 ; CHECK-LABEL: @foo_2double(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
@@ -104,7 +104,7 @@ entry:
 
 ; Similar to the previous test, but with different datatype.
 ; Function Attrs: nounwind ssp uwtable
-define void @foo_4float(i32 %u) #0 {
+define void @foo_4float(i32 %u) {
 ; CHECK-LABEL: @foo_4float(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
@@ -159,7 +159,7 @@ entry:
 
 ; Similar to the previous tests, but now we are dealing with AddRec SCEV.
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo_loop(ptr %A, i32 %n) #0 {
+define i32 @foo_loop(ptr %A, i32 %n) {
 ; CHECK-LABEL: @foo_loop(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
@@ -248,7 +248,7 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 ; Similar to foo_2double but with a non-power-of-2 factor and potential
 ; wrapping (both indices wrap or both don't in the same time)
 ; Function Attrs: nounwind ssp uwtable
-define void @foo_2double_non_power_of_2(i32 %u) #0 {
+define void @foo_2double_non_power_of_2(i32 %u) {
 ; CHECK-LABEL: @foo_2double_non_power_of_2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
@@ -289,7 +289,7 @@ entry:
 
 ; Similar to foo_2double_non_power_of_2 but with zext's instead of sext's
 ; Function Attrs: nounwind ssp uwtable
-define void @foo_2double_non_power_of_2_zext(i32 %u) #0 {
+define void @foo_2double_non_power_of_2_zext(i32 %u) {
 ; CHECK-LABEL: @foo_2double_non_power_of_2_zext(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
@@ -332,7 +332,7 @@ entry:
 ; Alternatively, this is like foo_loop, but with a non-power-of-2 factor and
 ; potential wrapping (both indices wrap or both don't in the same time)
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo_loop_non_power_of_2(ptr %A, i32 %n) #0 {
+define i32 @foo_loop_non_power_of_2(ptr %A, i32 %n) {
 ; CHECK-LABEL: @foo_loop_non_power_of_2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
@@ -438,7 +438,7 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 ;
 ; Make sure we are able to vectorize this from now on:
 ;
-define double @bar(ptr nocapture readonly %a, i32 %n) local_unnamed_addr #0 {
+define double @bar(ptr nocapture readonly %a, i32 %n) local_unnamed_addr {
 ; CHECK-X86-LABEL: @bar(
 ; CHECK-X86-NEXT:  entry:
 ; CHECK-X86-NEXT:    [[CMP15:%.*]] = icmp eq i32 [[N:%.*]], 0
@@ -548,8 +548,6 @@ define void @store_constant_expression(ptr %p) {
   ret void
 }
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
 !llvm.ident = !{!0}
 
 !0 = !{!"clang version 3.5.0 "}
diff --git a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll
index 74e52da..385df87 100644
--- a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll
@@ -6,7 +6,7 @@
 ; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer -slp-threshold=0 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,NOTHRESHOLD %}
 ; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer -slp-threshold=-10000 -slp-min-tree-size=0 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,MINTREESIZE %}
 
-define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; CHECK-LABEL: @simple_select(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
@@ -42,7 +42,7 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
 declare void @llvm.assume(i1) nounwind
 
 ; This entire tree is ephemeral, don't vectorize any of it.
-define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; THRESHOLD-LABEL: @simple_select_eph(
 ; THRESHOLD-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
 ; THRESHOLD-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
@@ -199,7 +199,7 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
 
 ; Insert in an order different from the vector indices to make sure it
 ; doesn't matter
-define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; CHECK-LABEL: @simple_select_insert_out_of_order(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
@@ -233,15 +233,15 @@ define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float
   ret <4 x float> %rd
 }
 
-declare void @v4f32_user(<4 x float>) #0
-declare void @f32_user(float) #0
+declare void @v4f32_user(<4 x float>)
+declare void @f32_user(float)
 
 ; Multiple users of the final constructed vector
-define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; CHECK-LABEL: @simple_select_users(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
-; CHECK-NEXT:    call void @v4f32_user(<4 x float> [[TMP2]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    call void @v4f32_user(<4 x float> [[TMP2]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP2]]
 ;
   %c0 = extractelement <4 x i32> %c, i32 0
@@ -268,12 +268,12 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32
   %rb = insertelement <4 x float> %ra, float %s1, i32 1
   %rc = insertelement <4 x float> %rb, float %s2, i32 2
   %rd = insertelement <4 x float> %rc, float %s3, i32 3
-  call void @v4f32_user(<4 x float> %rd) #0
+  call void @v4f32_user(<4 x float> %rd)
   ret <4 x float> %rd
 }
 
 ; Unused insertelement
-define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; CHECK-LABEL: @simple_select_no_users(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
@@ -319,7 +319,7 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
 
 ; Make sure infinite loop doesn't happen which I ran into when trying
 ; to do this backwards this backwards
-define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
+define <4 x i32> @reconstruct(<4 x i32> %c) {
 ; CHECK-LABEL: @reconstruct(
 ; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
 ; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
@@ -342,7 +342,7 @@ define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
   ret <4 x i32> %rd
 }
 
-define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
+define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) {
 ; CHECK-LABEL: @simple_select_v2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[C:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]
@@ -366,7 +366,7 @@ define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %
 ; Make sure when we construct partial vectors, we don't keep
 ; re-visiting the insertelement chains starting with undef
 ; (low cost threshold needed to force this to happen)
-define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; CHECK-LABEL: @simple_select_partial_vector(
 ; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
 ; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
@@ -487,7 +487,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
   ret <4 x double> %i4
 }
 
-define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 {
+define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr {
 ; CHECK-LABEL: @_vadd256(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
@@ -526,5 +526,3 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr
   %vecinit7.i = insertelement <8 x float> %vecinit6.i, float %add22, i32 7
   ret <8 x float> %vecinit7.i
 }
-
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll
index 0b896f4..37c02d6 100644
--- a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll
@@ -6,7 +6,7 @@
 ; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer -slp-threshold=0 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,NOTHRESHOLD %}
 ; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer -slp-threshold=-10000 -slp-min-tree-size=0 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,MINTREESIZE %}
 
-define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; CHECK-LABEL: @simple_select(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
@@ -39,7 +39,7 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
   ret <4 x float> %rd
 }
 
-define <8 x float> @simple_select2(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <8 x float> @simple_select2(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; CHECK-LABEL: @simple_select2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
@@ -77,7 +77,7 @@ define <8 x float> @simple_select2(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
 declare void @llvm.assume(i1) nounwind
 
 ; This entire tree is ephemeral, don't vectorize any of it.
-define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; THRESHOLD-LABEL: @simple_select_eph(
 ; THRESHOLD-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
 ; THRESHOLD-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
@@ -234,7 +234,7 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
 
 ; Insert in an order different from the vector indices to make sure it
 ; doesn't matter
-define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; CHECK-LABEL: @simple_select_insert_out_of_order(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
@@ -268,15 +268,15 @@ define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float
   ret <4 x float> %rd
 }
 
-declare void @v4f32_user(<4 x float>) #0
-declare void @f32_user(float) #0
+declare void @v4f32_user(<4 x float>)
+declare void @f32_user(float)
 
 ; Multiple users of the final constructed vector
-define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; CHECK-LABEL: @simple_select_users(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
-; CHECK-NEXT:    call void @v4f32_user(<4 x float> [[TMP2]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    call void @v4f32_user(<4 x float> [[TMP2]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP2]]
 ;
   %c0 = extractelement <4 x i32> %c, i32 0
@@ -303,12 +303,12 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32
   %rb = insertelement <4 x float> %ra, float %s1, i32 1
   %rc = insertelement <4 x float> %rb, float %s2, i32 2
   %rd = insertelement <4 x float> %rc, float %s3, i32 3
-  call void @v4f32_user(<4 x float> %rd) #0
+  call void @v4f32_user(<4 x float> %rd)
   ret <4 x float> %rd
 }
 
 ; Unused insertelement
-define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; CHECK-LABEL: @simple_select_no_users(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
@@ -355,7 +355,7 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
 
 ; Make sure infinite loop doesn't happen which I ran into when trying
 ; to do this backwards this backwards
-define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
+define <4 x i32> @reconstruct(<4 x i32> %c) {
 ; CHECK-LABEL: @reconstruct(
 ; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
 ; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
@@ -378,7 +378,7 @@ define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
   ret <4 x i32> %rd
 }
 
-define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
+define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) {
 ; CHECK-LABEL: @simple_select_v2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[C:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]
@@ -402,7 +402,7 @@ define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %
 ; Make sure when we construct partial vectors, we don't keep
 ; re-visiting the insertelement chains starting with zeroinitializer
 ; (low cost threshold needed to force this to happen)
-define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
 ; CHECK-LABEL: @simple_select_partial_vector(
 ; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
 ; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
@@ -523,7 +523,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
   ret <4 x double> %i4
 }
 
-define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 {
+define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr {
 ; CHECK-LABEL: @_vadd256(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
@@ -562,5 +562,3 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr
   %vecinit7.i = insertelement <8 x float> %vecinit6.i, float %add22, i32 7
   ret <8 x float> %vecinit7.i
 }
-
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/SROA/mem-par-metadata-sroa.ll b/llvm/test/Transforms/SROA/mem-par-metadata-sroa.ll
index bc05971..82ebdaa 100644
--- a/llvm/test/Transforms/SROA/mem-par-metadata-sroa.ll
+++ b/llvm/test/Transforms/SROA/mem-par-metadata-sroa.ll
@@ -33,7 +33,6 @@
 ;     }
 ; }
 
-
 ; ModuleID = '<stdin>'
 source_filename = "mem-par-metadata-sroa1.cpp"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -42,7 +41,7 @@ target triple = "x86_64-unknown-linux-gnu"
 %class.Complex = type { float, float }
 
 ; Function Attrs: norecurse nounwind uwtable
-define void @_Z4testP7Complexl(ptr nocapture %out, i64 %size) local_unnamed_addr #0 {
+define void @_Z4testP7Complexl(ptr nocapture %out, i64 %size) local_unnamed_addr {
 ; CHECK-LABEL: @_Z4testP7Complexl(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
@@ -108,10 +107,7 @@ for.end:                                          ; preds = %for.cond
 }
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1
-
-attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { argmemonly nounwind }
+declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1)
 
 !llvm.ident = !{!0}
 
diff --git a/llvm/test/Transforms/SafeStack/ARM/debug.ll b/llvm/test/Transforms/SafeStack/ARM/debug.ll
index 207475a..dfce13a 100644
--- a/llvm/test/Transforms/SafeStack/ARM/debug.ll
+++ b/llvm/test/Transforms/SafeStack/ARM/debug.ll
@@ -42,16 +42,15 @@ declare void @llvm.lifetime.start.p0(ptr nocapture) #2
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #3
 
-declare void @Capture(ptr) local_unnamed_addr #4
+declare void @Capture(ptr) local_unnamed_addr
 
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.lifetime.end.p0(ptr nocapture) #2
 
-attributes #0 = { norecurse nounwind readonly safestack "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+dsp,+neon,+vfp3,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind safestack "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+dsp,+neon,+vfp3,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind readonly safestack }
+attributes #1 = { nounwind safestack }
 attributes #2 = { argmemonly nounwind }
 attributes #3 = { nounwind readnone speculatable }
-attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+dsp,+neon,+vfp3,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #5 = { nounwind }
 
 !llvm.dbg.cu = !{!2}
diff --git a/llvm/test/Transforms/SafeStack/X86/debug-loc.ll b/llvm/test/Transforms/SafeStack/X86/debug-loc.ll
index fcb4935..92197a7 100644
--- a/llvm/test/Transforms/SafeStack/X86/debug-loc.ll
+++ b/llvm/test/Transforms/SafeStack/X86/debug-loc.ll
@@ -44,11 +44,10 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-declare void @Capture(ptr) #2
+declare void @Capture(ptr)
 
-attributes #0 = { safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { safestack uwtable }
 attributes #1 = { nounwind readnone }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!15, !16}
diff --git a/llvm/test/Transforms/SafeStack/X86/debug-loc2.ll b/llvm/test/Transforms/SafeStack/X86/debug-loc2.ll
index e60522f..634231d 100644
--- a/llvm/test/Transforms/SafeStack/X86/debug-loc2.ll
+++ b/llvm/test/Transforms/SafeStack/X86/debug-loc2.ll
@@ -45,7 +45,7 @@ entry:
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.lifetime.start.p0(ptr nocapture) #1
 
-declare void @capture(ptr) #2
+declare void @capture(ptr)
 
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.lifetime.end.p0(ptr nocapture) #1
@@ -55,9 +55,8 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) #3
 
 declare void @llvm.random.metadata.use(metadata)
 
-attributes #0 = { noinline safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline safestack uwtable }
 attributes #1 = { argmemonly nounwind }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #3 = { nounwind readnone }
 attributes #4 = { nounwind }
 
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-symbol-list.ll b/llvm/test/Transforms/SampleProfile/Inputs/profile-symbol-list.ll
index b8e64e8..a48d46d 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/profile-symbol-list.ll
+++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-symbol-list.ll
@@ -90,10 +90,10 @@ while.end:                                        ; preds = %while.body
 ; Function Attrs: nofree nounwind
 declare dso_local i32 @printf(ptr nocapture readonly, ...) local_unnamed_addr #3
 
-attributes #0 = { noinline norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #2 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { noinline norecurse nounwind readnone uwtable "use-sample-profile" }
+attributes #1 = { norecurse nounwind readnone uwtable "use-sample-profile" }
+attributes #2 = { nofree norecurse nounwind uwtable "use-sample-profile" }
+attributes #3 = { nofree nounwind "use-sample-profile" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/Transforms/SampleProfile/branch.ll b/llvm/test/Transforms/SampleProfile/branch.ll
index ff5d8bb..83f9354 100644
--- a/llvm/test/Transforms/SampleProfile/branch.ll
+++ b/llvm/test/Transforms/SampleProfile/branch.ll
@@ -62,7 +62,7 @@ if.end:                                           ; preds = %entry
   %1 = load ptr, ptr %argv.addr, align 8, !dbg !30
   %arrayidx = getelementptr inbounds ptr, ptr %1, i64 1, !dbg !30
   %2 = load ptr, ptr %arrayidx, align 8, !dbg !30
-  %call = call i32 @atoi(ptr %2) #4, !dbg !31
+  %call = call i32 @atoi(ptr %2), !dbg !31
   store i32 %call, ptr %limit, align 4, !dbg !29
   %3 = load i32, ptr %limit, align 4, !dbg !32
   %cmp1 = icmp sgt i32 %3, 100, !dbg !34
@@ -75,7 +75,7 @@ if.then.2:                                        ; preds = %if.end
   %4 = load ptr, ptr %argv.addr, align 8, !dbg !39
   %arrayidx3 = getelementptr inbounds ptr, ptr %4, i64 2, !dbg !39
   %5 = load ptr, ptr %arrayidx3, align 8, !dbg !39
-  %call4 = call i32 @atoi(ptr %5) #4, !dbg !40
+  %call4 = call i32 @atoi(ptr %5), !dbg !40
   %conv = sitofp i32 %call4 to double, !dbg !40
   %mul = fmul double 0x40370ABE6A337A81, %conv, !dbg !41
   store double %mul, ptr %s, align 8, !dbg !38
@@ -128,7 +128,7 @@ if.else:                                          ; preds = %if.end
   %16 = load ptr, ptr %argv.addr, align 8, !dbg !72
   %arrayidx10 = getelementptr inbounds ptr, ptr %16, i64 2, !dbg !72
   %17 = load ptr, ptr %arrayidx10, align 8, !dbg !72
-  %call11 = call i32 @atoi(ptr %17) #4, !dbg !74
+  %call11 = call i32 @atoi(ptr %17), !dbg !74
   %conv12 = sitofp i32 %call11 to double, !dbg !74
   store double %conv12, ptr %result, align 8, !dbg !75
   br label %if.end.13
@@ -145,18 +145,14 @@ return:                                           ; preds = %if.end.13, %if.then
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 ; Function Attrs: nounwind readonly
-declare i32 @atoi(ptr) #2
+declare i32 @atoi(ptr)
 
-declare i32 @printf(ptr, ...) #3
+declare i32 @printf(ptr, ...)
 
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { nounwind readonly }
+attributes #0 = { uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13, !14}
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
index 077eab7..ade2d73 100644
--- a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
+++ b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
@@ -56,7 +56,7 @@ for.body:                                         ; preds = %for.body, %entry
 ; THRESHOLD-REPLAY: !{!"function_entry_count", i64 1, i64 446061515086924981, i64 3815895320998406042, i64 6309742469962978389, i64 7102633082150537521, i64 -2862076748587597320, i64 -2016976694713209516}
 ; THRESHOLD-REPLAY-NO-FUNCA: !{!"function_entry_count", i64 1, i64 446061515086924981, i64 3815895320998406042, i64 6309742469962978389, i64 7102633082150537521, i64 -2862076748587597320}
 
-attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nofree noinline norecurse nounwind uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!14, !15, !16}
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll
index fe31023..de51afd 100644
--- a/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll
+++ b/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll
@@ -88,8 +88,8 @@ entry:
 
 declare i32 @_Z3fibi(i32)
 
-attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nofree noinline norecurse nounwind uwtable "use-sample-profile" }
+attributes #1 = { nofree norecurse nounwind uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!14, !15, !16}
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
index 177329f..deabc27 100644
--- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
@@ -109,8 +109,8 @@ entry:
 
 declare i32 @_Z3fibi(i32)
 
-attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nofree noinline norecurse nounwind uwtable "use-sample-profile" }
+attributes #1 = { nofree norecurse nounwind uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!14, !15, !16}
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-summary.ll b/llvm/test/Transforms/SampleProfile/csspgo-summary.ll
index f18425e..3daa69e 100644
--- a/llvm/test/Transforms/SampleProfile/csspgo-summary.ll
+++ b/llvm/test/Transforms/SampleProfile/csspgo-summary.ll
@@ -75,8 +75,8 @@ entry:
 
 declare i32 @_Z3fibi(i32)
 
-attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nofree noinline norecurse nounwind uwtable "use-sample-profile" }
+attributes #1 = { nofree norecurse nounwind uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!14, !15, !16}
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll
index 030b5aa..e4ee939 100644
--- a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll
+++ b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll
@@ -87,8 +87,8 @@ entry:
 
 declare i32 @_Z3fibi(i32)
 
-attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nofree noinline norecurse nounwind uwtable "use-sample-profile" }
+attributes #1 = { nofree norecurse nounwind uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!14, !15, !16}
diff --git a/llvm/test/Transforms/SampleProfile/entry_counts_cold.ll b/llvm/test/Transforms/SampleProfile/entry_counts_cold.ll
index c7617c1..4aacde8 100644
--- a/llvm/test/Transforms/SampleProfile/entry_counts_cold.ll
+++ b/llvm/test/Transforms/SampleProfile/entry_counts_cold.ll
@@ -91,12 +91,11 @@ declare void @llvm.lifetime.start.p0(ptr nocapture) #2
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.lifetime.end.p0(ptr nocapture) #2
 
-declare void @baz(...) #3
+declare void @baz(...)
 
-attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nounwind ssp uwtable "use-sample-profile" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { argmemonly nounwind }
-attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #4 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/Transforms/SampleProfile/entry_counts_missing_dbginfo.ll b/llvm/test/Transforms/SampleProfile/entry_counts_missing_dbginfo.ll
index 0e62921..e0cd883 100644
--- a/llvm/test/Transforms/SampleProfile/entry_counts_missing_dbginfo.ll
+++ b/llvm/test/Transforms/SampleProfile/entry_counts_missing_dbginfo.ll
@@ -101,12 +101,11 @@ declare void @llvm.lifetime.start.p0(ptr nocapture) #2
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.lifetime.end.p0(ptr nocapture) #2
 
-declare void @baz(...) #3
+declare void @baz(...)
 
-attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nounwind ssp uwtable "use-sample-profile" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { argmemonly nounwind }
-attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #4 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/Transforms/SampleProfile/fsafdo_test.ll b/llvm/test/Transforms/SampleProfile/fsafdo_test.ll
index 4a35cb1..8cc5d06 100644
--- a/llvm/test/Transforms/SampleProfile/fsafdo_test.ll
+++ b/llvm/test/Transforms/SampleProfile/fsafdo_test.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @sum = dso_local local_unnamed_addr global i32 0, align 4
 
 declare i32 @bar(i32 %i) #0
-declare void @work(i32 %i) #2
+declare void @work(i32 %i)
 
 define dso_local void @foo() #0 !dbg !29 {
 ; CHECK: Printing analysis {{.*}} for function 'foo':
@@ -141,7 +141,7 @@ if.end9.3:
 ; CHECK: edge %if.end9.3 -> %for.cond1.preheader probability is 0x7f7cb227 / 0x80000000 = 99.60% [HOT edge]
 }
 
-define dso_local i32 @main() #3 !dbg !52 {
+define dso_local i32 @main() !dbg !52 {
 entry:
   br label %for.body, !dbg !53
 
@@ -157,10 +157,8 @@ for.end:
 }
 
 
-attributes #0 = { noinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile"}
+attributes #0 = { noinline nounwind uwtable "use-sample-profile"}
 attributes #1 = { argmemonly nounwind willreturn }
-attributes #2 = { nofree noinline norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/Transforms/SampleProfile/gcc-simple.ll b/llvm/test/Transforms/SampleProfile/gcc-simple.ll
index 315ac5a..2496d18 100644
--- a/llvm/test/Transforms/SampleProfile/gcc-simple.ll
+++ b/llvm/test/Transforms/SampleProfile/gcc-simple.ll
@@ -31,7 +31,7 @@ entry:
   %i.addr = alloca i64, align 8
   store i64 %i, ptr %i.addr, align 8
   call void @llvm.dbg.declare(metadata ptr %i.addr, metadata !16, metadata !17), !dbg !18
-  %call = call i32 @rand() #3, !dbg !19
+  %call = call i32 @rand(), !dbg !19
 ; CHECK: !prof ![[PROF1:[0-9]+]]
   %cmp = icmp slt i32 %call, 500, !dbg !21
   br i1 %cmp, label %if.then, label %if.else, !dbg !22
@@ -42,7 +42,7 @@ if.then:                                          ; preds = %entry
   br label %return, !dbg !23
 
 if.else:                                          ; preds = %entry
-  %call1 = call i32 @rand() #3, !dbg !25
+  %call1 = call i32 @rand(), !dbg !25
 ; CHECK: !prof ![[PROF3:[0-9]+]]
   %cmp2 = icmp sgt i32 %call1, 5000, !dbg !28
   br i1 %cmp2, label %if.then.3, label %if.else.4, !dbg !29
@@ -62,10 +62,10 @@ return:                                           ; preds = %if.else.4, %if.then
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 ; Function Attrs: nounwind
-declare i32 @rand() #2
+declare i32 @rand()
 
 ; Function Attrs: nounwind uwtable
 define i32 @main() #0 !dbg !9 {
@@ -141,10 +141,7 @@ for.end.6:                                        ; preds = %for.cond
 ; CHECK: ![[PROF7]] = !{!"branch_weights", i32 18943, i32 1}
 ; CHECK: ![[PROF8]] = !{!"branch_weights", i32 18942}
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind }
+attributes #0 = { nounwind uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13, !14}
diff --git a/llvm/test/Transforms/SampleProfile/inline-act.ll b/llvm/test/Transforms/SampleProfile/inline-act.ll
index 3ab3efd..e962642 100644
--- a/llvm/test/Transforms/SampleProfile/inline-act.ll
+++ b/llvm/test/Transforms/SampleProfile/inline-act.ll
@@ -24,7 +24,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @t = global i32 0, align 4
 
 ; Function Attrs: nounwind uwtable
-define zeroext i1 @_Z3fooi(i32) #0 {
+define zeroext i1 @_Z3fooi(i32) {
   %switch.tableidx = sub i32 %0, 0
   %2 = icmp ult i32 %switch.tableidx, 4
   br i1 %2, label %switch.lookup, label %3
@@ -41,7 +41,7 @@ switch.lookup:                                    ; preds = %1
 }
 
 ; Function Attrs: nounwind uwtable
-define void @_Z3bari(i32) #0 !dbg !9 {
+define void @_Z3bari(i32) !dbg !9 {
   %2 = call zeroext i1 @_Z3fooi(i32 %0), !dbg !10
   br i1 %2, label %3, label %6, !dbg !10
 
@@ -55,8 +55,6 @@ define void @_Z3bari(i32) #0 !dbg !9 {
   ret void
 }
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3}
 !llvm.ident = !{!4}
diff --git a/llvm/test/Transforms/SampleProfile/misexpect.ll b/llvm/test/Transforms/SampleProfile/misexpect.ll
index 26f76ee5..a7667fc 100644
--- a/llvm/test/Transforms/SampleProfile/misexpect.ll
+++ b/llvm/test/Transforms/SampleProfile/misexpect.ll
@@ -68,7 +68,7 @@ if.end:                                           ; preds = %entry
   %1 = load ptr, ptr %argv.addr, align 8, !dbg !30
   %arrayidx = getelementptr inbounds ptr, ptr %1, i64 1, !dbg !30
   %2 = load ptr, ptr %arrayidx, align 8, !dbg !30
-  %call = call i32 @atoi(ptr %2) #4, !dbg !31
+  %call = call i32 @atoi(ptr %2), !dbg !31
   store i32 %call, ptr %limit, align 4, !dbg !29
   %3 = load i32, ptr %limit, align 4, !dbg !32
   %exp = call i32 @llvm.expect.i32(i32 %3, i32 0)
@@ -80,7 +80,7 @@ if.then.2:                                        ; preds = %if.end
   %4 = load ptr, ptr %argv.addr, align 8, !dbg !39
   %arrayidx3 = getelementptr inbounds ptr, ptr %4, i64 2, !dbg !39
   %5 = load ptr, ptr %arrayidx3, align 8, !dbg !39
-  %call4 = call i32 @atoi(ptr %5) #4, !dbg !40
+  %call4 = call i32 @atoi(ptr %5), !dbg !40
   %conv = sitofp i32 %call4 to double, !dbg !40
   %mul = fmul double 0x40370ABE6A337A81, %conv, !dbg !41
   store double %mul, ptr %s, align 8, !dbg !38
@@ -130,7 +130,7 @@ if.else:                                          ; preds = %if.end
   %16 = load ptr, ptr %argv.addr, align 8, !dbg !72
   %arrayidx10 = getelementptr inbounds ptr, ptr %16, i64 2, !dbg !72
   %17 = load ptr, ptr %arrayidx10, align 8, !dbg !72
-  %call11 = call i32 @atoi(ptr %17) #4, !dbg !74
+  %call11 = call i32 @atoi(ptr %17), !dbg !74
   %conv12 = sitofp i32 %call11 to double, !dbg !74
   store double %conv12, ptr %result, align 8, !dbg !75
   br label %if.end.13
@@ -147,23 +147,17 @@ return:                                           ; preds = %if.end.13, %if.then
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 ; Function Attrs: nounwind readonly
-declare i32 @atoi(ptr) #2
+declare i32 @atoi(ptr)
 
-declare i32 @printf(ptr, ...) #3
+declare i32 @printf(ptr, ...)
 
 ; Function Attrs: nounwind readnone willreturn
-declare i32 @llvm.expect.i32(i32, i32) #5
+declare i32 @llvm.expect.i32(i32, i32)
 
-
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { nounwind readonly }
-attributes #5 = { nounwind readnone willreturn }
+attributes #0 = { uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13, !14}
diff --git a/llvm/test/Transforms/SampleProfile/norepeated-icp-2.ll b/llvm/test/Transforms/SampleProfile/norepeated-icp-2.ll
index 30a5198..5ce8670 100644
--- a/llvm/test/Transforms/SampleProfile/norepeated-icp-2.ll
+++ b/llvm/test/Transforms/SampleProfile/norepeated-icp-2.ll
@@ -17,7 +17,7 @@ entry:
 }
 
 ; Function Attrs: nofree nounwind
-declare dso_local noundef i32 @printf(ptr nocapture noundef readonly, ...) #1
+declare dso_local noundef i32 @printf(ptr nocapture noundef readonly, ...)
 
 ; Function Attrs: uwtable mustprogress
 define dso_local void @_Z3hoov() #0 !dbg !11 {
@@ -35,7 +35,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void, !dbg !22
 }
 
-declare !dbg !23 dso_local void @_Z10hoo_calleev() #2
+declare !dbg !23 dso_local void @_Z10hoo_calleev()
 
 ; MAX2-LABEL: @_Z3goov(
 ; MAX2: icmp eq ptr {{.*}} @_Z3hoov
@@ -78,12 +78,9 @@ entry:
 ; MAX4: ![[PROF_ID5]] = !{!"VP", i32 0, i64 13000, i64 4128940972712279918, i64 -1, i64 3137940972712279918, i64 -1, i64 2132940972712279918, i64 -1, i64 1850239051784516332, i64 -1}
 
 ; Function Attrs: nofree nounwind
-declare noundef i32 @puts(ptr nocapture noundef readonly) #3
+declare noundef i32 @puts(ptr nocapture noundef readonly)
 
-attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
-attributes #1 = { nofree nounwind "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nofree nounwind }
+attributes #0 = { uwtable mustprogress "use-sample-profile" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/Transforms/SampleProfile/norepeated-icp-3.ll b/llvm/test/Transforms/SampleProfile/norepeated-icp-3.ll
index 9b9bbca4..c5645cd 100644
--- a/llvm/test/Transforms/SampleProfile/norepeated-icp-3.ll
+++ b/llvm/test/Transforms/SampleProfile/norepeated-icp-3.ll
@@ -34,11 +34,10 @@ entry:
 }
 
 ; Function Attrs: nofree nounwind
-declare noundef i32 @puts(ptr nocapture noundef readonly) #2
+declare noundef i32 @puts(ptr nocapture noundef readonly) #1
 
-attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
-attributes #1 = { nofree nounwind "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nofree nounwind }
+attributes #0 = { uwtable mustprogress "use-sample-profile" }
+attributes #1 = { nofree nounwind }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/Transforms/SampleProfile/norepeated-icp-4.ll b/llvm/test/Transforms/SampleProfile/norepeated-icp-4.ll
index 57a2386..c418372 100644
--- a/llvm/test/Transforms/SampleProfile/norepeated-icp-4.ll
+++ b/llvm/test/Transforms/SampleProfile/norepeated-icp-4.ll
@@ -33,7 +33,7 @@ entry:
   ret void, !dbg !21
 }
 
-attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
+attributes #0 = { uwtable mustprogress "use-sample-profile" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5, !25}
diff --git a/llvm/test/Transforms/SampleProfile/norepeated-icp.ll b/llvm/test/Transforms/SampleProfile/norepeated-icp.ll
index f3340ba..bf3cfe4 100644
--- a/llvm/test/Transforms/SampleProfile/norepeated-icp.ll
+++ b/llvm/test/Transforms/SampleProfile/norepeated-icp.ll
@@ -15,7 +15,7 @@ entry:
 }
 
 ; Function Attrs: nofree nounwind
-declare dso_local noundef i32 @printf(ptr nocapture noundef readonly, ...) #1
+declare dso_local noundef i32 @printf(ptr nocapture noundef readonly, ...)
 
 ; Function Attrs: uwtable mustprogress
 define dso_local void @_Z3goov() #0 !dbg !11 {
@@ -40,11 +40,9 @@ entry:
 }
 
 ; Function Attrs: nofree nounwind
-declare noundef i32 @puts(ptr nocapture noundef readonly) #2
+declare noundef i32 @puts(ptr nocapture noundef readonly)
 
-attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
-attributes #1 = { nofree nounwind "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nofree nounwind }
+attributes #0 = { uwtable mustprogress "use-sample-profile" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/Transforms/SampleProfile/offset.ll b/llvm/test/Transforms/SampleProfile/offset.ll
index 0fdeb08..0f0187a 100644
--- a/llvm/test/Transforms/SampleProfile/offset.ll
+++ b/llvm/test/Transforms/SampleProfile/offset.ll
@@ -47,7 +47,7 @@ return:                                           ; preds = %if.else, %if.then
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nounwind uwtable "use-sample-profile" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/Transforms/SampleProfile/profile-context-order.ll b/llvm/test/Transforms/SampleProfile/profile-context-order.ll
index db368bc..eddbb0b 100644
--- a/llvm/test/Transforms/SampleProfile/profile-context-order.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-context-order.ll
@@ -118,8 +118,8 @@ entry:
 
 declare i32 @_Z3foo(i32)
 
-attributes #0 = { nofree noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { nofree nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nofree noinline nounwind uwtable "use-sample-profile" }
+attributes #1 = { nofree nounwind uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!14, !15, !16}
diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll
index bb0abb1..aad4e38 100644
--- a/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll
@@ -155,8 +155,8 @@ entry:
 
 declare i32 @_Z3fibi(i32)
 
-attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nofree noinline norecurse nounwind uwtable "use-sample-profile" }
+attributes #1 = { nofree norecurse nounwind uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!14, !15, !16}
diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
index 9d2ac2f..a1bf359 100644
--- a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
@@ -152,8 +152,8 @@ entry:
 
 declare i32 @_Z3fibi(i32)
 
-attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nofree noinline norecurse nounwind uwtable "use-sample-profile" }
+attributes #1 = { nofree norecurse nounwind uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!14, !15, !16}
diff --git a/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll b/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll
index f85ab24..3cb8cd1 100644
--- a/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll
@@ -99,8 +99,8 @@ entry:
 
 declare i32 @_Z3foo(i32)
 
-attributes #0 = { nofree noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { nofree nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nofree noinline nounwind uwtable "use-sample-profile" }
+attributes #1 = { nofree nounwind uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!14, !15, !16}
diff --git a/llvm/test/Transforms/SampleProfile/propagate.ll b/llvm/test/Transforms/SampleProfile/propagate.ll
index ece85a8..3a07152 100644
--- a/llvm/test/Transforms/SampleProfile/propagate.ll
+++ b/llvm/test/Transforms/SampleProfile/propagate.ll
@@ -123,7 +123,6 @@ for.cond8:                                        ; preds = %for.inc, %if.else7
 ; CHECK: edge %for.cond8 -> %for.body10 probability is 0x7e941a89 / 0x80000000 = 98.89% [HOT edge]
 ; CHECK: edge %for.cond8 -> %for.end probability is 0x016be577 / 0x80000000 = 1.11%
 
-
 for.body10:                                       ; preds = %for.cond8
   %14 = load i64, ptr %j, align 8, !dbg !69
   %15 = load i32, ptr %x.addr, align 4, !dbg !71
@@ -171,7 +170,7 @@ return:                                           ; preds = %if.end20, %if.then
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 ; Function Attrs: norecurse uwtable
 define i32 @main() #2 !dbg !86 {
@@ -198,12 +197,10 @@ entry:
   ret i32 0, !dbg !104
 }
 
-declare i32 @printf(ptr, ...) #3
+declare i32 @printf(ptr, ...)
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { norecurse uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
-attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "use-sample-profile" }
+attributes #2 = { norecurse uwtable "use-sample-profile" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4}
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
index 26ae198..7d6fd1b 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
@@ -2,11 +2,10 @@
 ; RUN: opt < %s -passes='default<O2>' -debug-info-for-profiling -pseudo-probe-for-profiling -S | FileCheck %s --check-prefix=PROBE
 ; RUN: opt < %s -passes='thinlto-pre-link<O2>' -debug-info-for-profiling -pseudo-probe-for-profiling -S | FileCheck %s --check-prefix=PROBE
 
-
 @a = dso_local global i32 0, align 4
 
 ; Function Attrs: uwtable
-define void @_Z3foov(i32 %x) #0 !dbg !4 {
+define void @_Z3foov(i32 %x) !dbg !4 {
 bb0:
   %cmp = icmp eq i32 %x, 0, !dbg !10
   br i1 %cmp, label %bb1, label %bb2
@@ -30,13 +29,10 @@ bb3:
   ret void, !dbg !12
 }
 
-declare void @_Z3barv() #1
+declare void @_Z3barv()
 declare void @llvm.lifetime.start.p0(ptr nocapture) nounwind argmemonly
 declare void @llvm.lifetime.end.p0(ptr nocapture) nounwind argmemonly
 
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
@@ -62,7 +58,6 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "fra
 ; DEBUG: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]])
 ; DEBUG: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)
 
-
 ; PROBE: ![[CALL1]] = !DILocation(line: 4, column: 3, scope: ![[CALL1BLOCK:[0-9]+]])
 ; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 455147551)
 ; PROBE: ![[CALL2]] = !DILocation(line: 4, column: 9, scope: ![[CALL2BLOCK:[0-9]+]])
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-icp-factor.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-icp-factor.ll
index 383289e..92f04d5 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-icp-factor.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-icp-factor.ll
@@ -135,7 +135,7 @@ declare dso_local i32 @printf(ptr, ...)
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
 declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "use-sample-profile" }
 attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 attributes #2 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
 attributes #3 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-no-debug-info.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-no-debug-info.ll
index e45ddb1..08b7e4c 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-no-debug-info.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-no-debug-info.ll
@@ -1,6 +1,5 @@
 ; RUN: opt < %s -passes='pseudo-probe,cgscc(inline)' -S | FileCheck %s
 
-
 ; CHECK-LABEL: @caller(
 
 ; This instruction did not have a !dbg metadata in the callee but get a !dbg after inlined.
@@ -10,26 +9,23 @@
 ; CHECK-NOT:  call void @llvm.pseudoprobe({{.*}}), !dbg ![[#]]
 ; CHECK:  call void @llvm.pseudoprobe({{.*}})
 
-
 @a = common global i32 0, align 4
 @b = common global i32 0, align 4
 
 ; Function Attrs: nounwind uwtable
-define void @callee() #0 {
+define void @callee() {
 entry:
   store i32 1, ptr @a, align 4
   ret void
 }
 
 ; Function Attrs: nounwind uwtable
-define void @caller() #0 !dbg !4 {
+define void @caller() !dbg !4 {
 entry:
   tail call void @callee(), !dbg !12
   ret void, !dbg !12
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll
index 3cb91b7..decf4b1 100644
--- a/llvm/test/Transforms/SampleProfile/remarks.ll
+++ b/llvm/test/Transforms/SampleProfile/remarks.ll
@@ -202,10 +202,10 @@ entry:
   ret i32 %conv, !dbg !58
 }
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #0 = { nounwind uwtable "use-sample-profile" }
 attributes #1 = { nounwind argmemonly }
 attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind alwaysinline "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind alwaysinline }
 attributes #4 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/Transforms/SampleProfile/uniqname.ll b/llvm/test/Transforms/SampleProfile/uniqname.ll
index 23c3ac9..67988c7 100644
--- a/llvm/test/Transforms/SampleProfile/uniqname.ll
+++ b/llvm/test/Transforms/SampleProfile/uniqname.ll
@@ -65,9 +65,9 @@ if.end:                                           ; preds = %if.then, %entry
   ret void, !dbg !31
 }
 
-declare !dbg !32 dso_local void @_Z10hoo_calleev() #3
+declare !dbg !32 dso_local void @_Z10hoo_calleev()
 
-declare !dbg !33 dso_local void @_Z10moo_calleev() #3
+declare !dbg !33 dso_local void @_Z10moo_calleev()
 
 ; Function Attrs: uwtable mustprogress
 define internal void @_ZL3noov.__uniq.334154460836426447066042049082945760258() #1 !dbg !34 {
@@ -84,12 +84,11 @@ if.end:                                           ; preds = %if.then, %entry
   ret void, !dbg !37
 }
 
-declare !dbg !38 dso_local void @_Z10noo_calleev() #3
+declare !dbg !38 dso_local void @_Z10noo_calleev()
 
-attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
-attributes #1 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "sample-profile-suffix-elision-policy"="selected" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
-attributes #2 = { noinline uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "sample-profile-suffix-elision-policy"="selected" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
-attributes #3 = { "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable mustprogress "use-sample-profile" }
+attributes #1 = { uwtable mustprogress "use-sample-profile" "sample-profile-suffix-elision-policy"="selected" }
+attributes #2 = { noinline uwtable mustprogress "use-sample-profile" "sample-profile-suffix-elision-policy"="selected" }
 
 ; CHECK: ![[PROF_ID1]] = !{!"branch_weights", i32 5931}
 ; CHECK: ![[PROF_ID2]] = !{!"branch_weights", i32 2000}
diff --git a/llvm/test/Transforms/Scalarizer/dbginfo.ll b/llvm/test/Transforms/Scalarizer/dbginfo.ll
index 464e752..d6b8aa2 100644
--- a/llvm/test/Transforms/Scalarizer/dbginfo.ll
+++ b/llvm/test/Transforms/Scalarizer/dbginfo.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind uwtable
-define void @f1(ptr nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c) #0 !dbg !4 {
+define void @f1(ptr nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c) !dbg !4 {
 ; CHECK: @f1(
 ; CHECK: %a.i1 = getelementptr i32, ptr %a, i32 1
 ; CHECK: %a.i2 = getelementptr i32, ptr %a, i32 2
@@ -45,10 +45,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
+declare void @llvm.dbg.value(metadata, metadata, metadata)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18, !26}
diff --git a/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
index ccc8def..7aa6ced 100644
--- a/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
@@ -7,7 +7,7 @@ target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i
 target triple = "hexagon-unknown--elf"
 
 ; Function Attrs: noinline nounwind
-define i32 @foo(i32 %x) #0 section ".tcm_text" {
+define i32 @foo(i32 %x) section ".tcm_text" {
 ; ENABLE-LABEL: @foo(
 ; ENABLE-NEXT:  entry:
 ; ENABLE-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 6
@@ -92,5 +92,3 @@ return:                                           ; preds = %sw.default, %sw.bb5
   %1 = load i32, ptr %retval, align 4
   ret i32 %1
 }
-
-attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/merge-cleanuppads.ll b/llvm/test/Transforms/SimplifyCFG/X86/merge-cleanuppads.ll
index 6e6c97f..2d21a59 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/merge-cleanuppads.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/merge-cleanuppads.ll
@@ -3,37 +3,33 @@ target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc18.0.0"
 
 ; Function Attrs: uwtable
-define void @test1() #0 personality ptr @__CxxFrameHandler3 {
+define void @test1() personality ptr @__CxxFrameHandler3 {
 entry:
   invoke void @may_throw(i32 3)
           to label %invoke.cont unwind label %ehcleanup
 
 invoke.cont:                                      ; preds = %entry
-  tail call void @may_throw(i32 2) #2
-  tail call void @may_throw(i32 1) #2
+  tail call void @may_throw(i32 2)
+  tail call void @may_throw(i32 1)
   ret void
 
 ehcleanup:                                        ; preds = %entry
   %cp = cleanuppad within none []
-  tail call void @may_throw(i32 2) #2 [ "funclet"(token %cp) ]
+  tail call void @may_throw(i32 2) [ "funclet"(token %cp) ]
   cleanupret from %cp unwind label %ehcleanup2
 
 ehcleanup2:
   %cp2 = cleanuppad within none []
-  tail call void @may_throw(i32 1) #2 [ "funclet"(token %cp2) ]
+  tail call void @may_throw(i32 1) [ "funclet"(token %cp2) ]
   cleanupret from %cp2 unwind to caller
 }
 
 ; CHECK-LABEL: define void @test1(
 ; CHECK: %[[cp:.*]] = cleanuppad within none []
-; CHECK: tail call void @may_throw(i32 2) #2 [ "funclet"(token %[[cp]]) ]
-; CHECK: tail call void @may_throw(i32 1) #2 [ "funclet"(token %[[cp]]) ]
+; CHECK: tail call void @may_throw(i32 2) [ "funclet"(token %[[cp]]) ]
+; CHECK: tail call void @may_throw(i32 1) [ "funclet"(token %[[cp]]) ]
 ; CHECK: cleanupret from %[[cp]] unwind to caller
 
-declare void @may_throw(i32) #1
+declare void @may_throw(i32)
 
 declare i32 @__CxxFrameHandler3(...)
-
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind }
diff --git a/llvm/test/Transforms/SimplifyCFG/pr50060-constantfold-loopid.ll b/llvm/test/Transforms/SimplifyCFG/pr50060-constantfold-loopid.ll
index 19e1c73..0363792 100644
--- a/llvm/test/Transforms/SimplifyCFG/pr50060-constantfold-loopid.ll
+++ b/llvm/test/Transforms/SimplifyCFG/pr50060-constantfold-loopid.ll
@@ -13,7 +13,7 @@
 @C = dso_local global i32 0, align 4
 
 ; Function Attrs: nounwind
-define dso_local void @_Z6test01v() addrspace(1) #0 {
+define dso_local void @_Z6test01v() addrspace(1) {
 ; CHECK-LABEL: @_Z6test01v(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
@@ -22,7 +22,7 @@ define dso_local void @_Z6test01v() addrspace(1) #0 {
 ; CHECK:       do.body:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @C, align 4, !tbaa [[TBAA2:![0-9]+]]
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; CHECK-NEXT:    call addrspace(1) void @llvm.lifetime.start.p0(ptr [[J]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    call addrspace(1) void @llvm.lifetime.start.p0(ptr [[J]])
 ; CHECK-NEXT:    store i32 0, ptr [[J]], align 4, !tbaa [[TBAA2]]
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
@@ -30,11 +30,11 @@ define dso_local void @_Z6test01v() addrspace(1) #0 {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 3
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    call addrspace(1) void @llvm.lifetime.end.p0(ptr [[J]]) #[[ATTR2]]
+; CHECK-NEXT:    call addrspace(1) void @llvm.lifetime.end.p0(ptr [[J]])
 ; CHECK-NEXT:    br label [[DO_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    store i32 undef, ptr [[I]], align 4
-; CHECK-NEXT:    call addrspace(1) void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR2]]
+; CHECK-NEXT:    call addrspace(1) void @llvm.lifetime.start.p0(ptr [[I]])
 ; CHECK-NEXT:    store i32 0, ptr [[I]], align 4, !tbaa [[TBAA2]]
 ; CHECK-NEXT:    br label [[FOR_COND1:%.*]]
 ; CHECK:       for.cond1:
@@ -43,7 +43,7 @@ define dso_local void @_Z6test01v() addrspace(1) #0 {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY4:%.*]], label [[FOR_COND_CLEANUP3:%.*]]
 ; CHECK:       for.cond.cleanup3:
-; CHECK-NEXT:    call addrspace(1) void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR2]]
+; CHECK-NEXT:    call addrspace(1) void @llvm.lifetime.end.p0(ptr [[I]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[J]], align 4, !tbaa [[TBAA2]]
 ; CHECK-NEXT:    [[INC7:%.*]] = add nsw i32 [[TMP4]], 1
 ; CHECK-NEXT:    store i32 [[INC7]], ptr [[J]], align 4, !tbaa [[TBAA2]]
@@ -64,7 +64,7 @@ entry:
 do.body:                                          ; preds = %do.cond, %entry
   %0 = load i32, ptr @C, align 4, !tbaa !2
   %inc = add nsw i32 %0, 1
-  call addrspace(1) void @llvm.lifetime.start.p0(ptr %j) #2
+  call addrspace(1) void @llvm.lifetime.start.p0(ptr %j)
   store i32 0, ptr %j, align 4, !tbaa !2
   br label %for.cond
 
@@ -74,12 +74,12 @@ for.cond:                                         ; preds = %for.inc6, %do.body
   br i1 %cmp, label %for.body, label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %for.cond
-  call addrspace(1) void @llvm.lifetime.end.p0(ptr %j) #2
+  call addrspace(1) void @llvm.lifetime.end.p0(ptr %j)
   br label %for.end8
 
 for.body:                                         ; preds = %for.cond
   store i32 undef, ptr %i, align 4
-  call addrspace(1) void @llvm.lifetime.start.p0(ptr %i) #2
+  call addrspace(1) void @llvm.lifetime.start.p0(ptr %i)
   store i32 0, ptr %i, align 4, !tbaa !2
   br label %for.cond1
 
@@ -90,7 +90,7 @@ for.cond1:                                        ; preds = %for.inc, %for.body
   br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
 
 for.cond.cleanup3:                                ; preds = %for.cond1
-  call addrspace(1) void @llvm.lifetime.end.p0(ptr %i) #2
+  call addrspace(1) void @llvm.lifetime.end.p0(ptr %i)
   br label %for.end
 
 for.body4:                                        ; preds = %for.cond1
@@ -124,14 +124,10 @@ do.end:                                           ; preds = %do.cond
 }
 
 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0(ptr nocapture) addrspace(1) #1
+declare void @llvm.lifetime.start.p0(ptr nocapture) addrspace(1)
 
 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0(ptr nocapture) addrspace(1) #1
-
-attributes #0 = { nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { argmemonly nofree nosync nounwind willreturn }
-attributes #2 = { nounwind }
+declare void @llvm.lifetime.end.p0(ptr nocapture) addrspace(1)
 
 !llvm.module.flags = !{!0}
 !llvm.ident = !{!1}
diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/pr23975.ll b/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/pr23975.ll
index b3cbc3d..0d3846d 100644
--- a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/pr23975.ll
+++ b/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/pr23975.ll
@@ -7,7 +7,7 @@ target triple = "amdgcn--"
 %struct.Matrix4x4 = type { [4 x [4 x float]] }
 
 ; Function Attrs: nounwind
-define fastcc void @Accelerator_Intersect(ptr addrspace(1) nocapture readonly %leafTransformations) #0 {
+define fastcc void @Accelerator_Intersect(ptr addrspace(1) nocapture readonly %leafTransformations) {
 ; CHECK-LABEL:  @Accelerator_Intersect(
 entry:
   %tmp = sext i32 undef to i64
@@ -17,5 +17,3 @@ entry:
   %tmp2 = load <4 x float>, ptr addrspace(1) undef, align 4
   ret void
 }
-
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "target-cpu"="tahiti" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/StructurizeCFG/nested-loop-order.ll b/llvm/test/Transforms/StructurizeCFG/nested-loop-order.ll
index 91e9511..11753cf 100644
--- a/llvm/test/Transforms/StructurizeCFG/nested-loop-order.ll
+++ b/llvm/test/Transforms/StructurizeCFG/nested-loop-order.ll
@@ -63,7 +63,3 @@ ENDIF28:                                          ; preds = %ENDIF
   %tmp36 = icmp sgt i32 %tmp20, 2
   br i1 %tmp36, label %ENDLOOP, label %LOOP.outer
 }
-
-attributes #0 = { "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { readnone }
diff --git a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
index d1c0f64..113b052 100644
--- a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
+++ b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
@@ -638,7 +638,7 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
 !0 = !{!1, !3, !5, !7, !9, !11}
 !1 = !{!2, !"cold"}
 !2 = !{i64 1, i64 2, i64 3}
-!3 = !{!4, !"cold"}
+!3 = !{!4, !"cold", !13}
 !4 = !{i64 1, i64 2, i64 4}
 !5 = !{!6, !"notcold"}
 !6 = !{i64 1, i64 5, i64 6}
@@ -648,6 +648,7 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
 !10 = !{i64 1, i64 8, i64 9}
 !11 = !{!12, !"hot"}
 !12 = !{i64 1, i64 8, i64 10}  
+!13 = !{i64 123, i64 456}
 )IR");
 
   Function *Func = M->getFunction("test");
@@ -683,10 +684,25 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
     auto *StackId = mdconst::dyn_extract<ConstantInt>(StackMD->getOperand(0));
     EXPECT_EQ(StackId->getZExtValue(), 1u);
     StackId = mdconst::dyn_extract<ConstantInt>(StackMD->getOperand(1));
-    if (StackId->getZExtValue() == 2u)
+    if (StackId->getZExtValue() == 2u) {
       EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Cold);
-    else if (StackId->getZExtValue() == 5u)
+      // We should propagate the single context size info from the second cold
+      // context above onto the new merged/trimmed context.
+      ASSERT_EQ(MIB->getNumOperands(), 3u);
+      MDNode *ContextSizePair = dyn_cast<MDNode>(MIB->getOperand(2));
+      assert(ContextSizePair->getNumOperands() == 2);
+      EXPECT_EQ(
+          mdconst::dyn_extract<ConstantInt>(ContextSizePair->getOperand(0))
+              ->getZExtValue(),
+          123u);
+      EXPECT_EQ(
+          mdconst::dyn_extract<ConstantInt>(ContextSizePair->getOperand(1))
+              ->getZExtValue(),
+          456u);
+    } else if (StackId->getZExtValue() == 5u) {
       EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold);
+      ASSERT_EQ(MIB->getNumOperands(), 2u);
+    }
   }
 }
 
diff --git a/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt b/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt
index 7e3ebc8..7b563d7 100644
--- a/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt
+++ b/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt
@@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS
   IRReader
   JITLink
   Object
+  ObjectYAML
   OrcDebugging
   OrcJIT
   OrcShared
@@ -25,6 +26,7 @@ add_llvm_unittest(OrcJITTests
   IndirectionUtilsTest.cpp
   JITTargetMachineBuilderTest.cpp
   LazyCallThroughAndReexportsTest.cpp
+  LibraryResolverTest.cpp
   LookupAndRecordAddrsTest.cpp
   MachOPlatformTest.cpp
   MapperJITLinkMemoryManagerTest.cpp
diff --git a/llvm/unittests/ExecutionEngine/Orc/Inputs/A/A_linux.yaml b/llvm/unittests/ExecutionEngine/Orc/Inputs/A/A_linux.yaml
new file mode 100644
index 0000000..afd1d9e
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/Orc/Inputs/A/A_linux.yaml
@@ -0,0 +1,460 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_DYN
+  Machine:         EM_X86_64
+ProgramHeaders:
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .rela.plt
+    Align:           0x1000
+    Offset:          0x0
+  - Type:            PT_LOAD
+    Flags:           [ PF_X, PF_R ]
+    FirstSec:        .init
+    LastSec:         .fini
+    VAddr:           0x1000
+    Align:           0x1000
+    Offset:          0x1000
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .rodata
+    LastSec:         .eh_frame
+    VAddr:           0x2000
+    Align:           0x1000
+    Offset:          0x2000
+  - Type:            PT_LOAD
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .bss
+    VAddr:           0x3E10
+    Align:           0x1000
+    Offset:          0x2E10
+  - Type:            PT_DYNAMIC
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .dynamic
+    LastSec:         .dynamic
+    VAddr:           0x3E20
+    Align:           0x8
+    Offset:          0x2E20
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .note.gnu.property
+    VAddr:           0x2A8
+    Align:           0x8
+    Offset:          0x2A8
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.build-id
+    LastSec:         .note.gnu.build-id
+    VAddr:           0x2C8
+    Align:           0x4
+    Offset:          0x2C8
+  - Type:            PT_GNU_PROPERTY
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .note.gnu.property
+    VAddr:           0x2A8
+    Align:           0x8
+    Offset:          0x2A8
+  - Type:            PT_GNU_EH_FRAME
+    Flags:           [ PF_R ]
+    FirstSec:        .eh_frame_hdr
+    LastSec:         .eh_frame_hdr
+    VAddr:           0x2010
+    Align:           0x4
+    Offset:          0x2010
+  - Type:            PT_GNU_STACK
+    Flags:           [ PF_W, PF_R ]
+    Align:           0x10
+    Offset:          0x0
+  - Type:            PT_GNU_RELRO
+    Flags:           [ PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .got
+    VAddr:           0x3E10
+    Offset:          0x2E10
+Sections:
+  - Name:            .note.gnu.property
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2A8
+    AddressAlign:    0x8
+    Notes:
+      - Name:            GNU
+        Desc:            020000C0040000000300000000000000
+        Type:            NT_GNU_PROPERTY_TYPE_0
+  - Name:            .note.gnu.build-id
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2C8
+    AddressAlign:    0x4
+    Notes:
+      - Name:            GNU
+        Desc:            73604396C95840D5C380A0950F085A778F94EE7C
+        Type:            NT_PRPSINFO
+  - Name:            .gnu.hash
+    Type:            SHT_GNU_HASH
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2F0
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Header:
+      SymNdx:          0x6
+      Shift2:          0x6
+    BloomFilter:     [ 0x400000080000 ]
+    HashBuckets:     [ 0x0, 0x6 ]
+    HashValues:      [ 0x7C9DCB93 ]
+  - Name:            .dynsym
+    Type:            SHT_DYNSYM
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x318
+    Link:            .dynstr
+    AddressAlign:    0x8
+  - Name:            .dynstr
+    Type:            SHT_STRTAB
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x3C0
+    AddressAlign:    0x1
+  - Name:            .gnu.version
+    Type:            SHT_GNU_versym
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x436
+    Link:            .dynsym
+    AddressAlign:    0x2
+    Entries:         [ 0, 1, 2, 1, 1, 2, 1 ]
+  - Name:            .gnu.version_r
+    Type:            SHT_GNU_verneed
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x448
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Dependencies:
+      - Version:         1
+        File:            libc.so.6
+        Entries:
+          - Name:            GLIBC_2.2.5
+            Hash:            157882997
+            Flags:           0
+            Other:           2
+  - Name:            .rela.dyn
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x468
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Relocations:
+      - Offset:          0x3E10
+        Type:            R_X86_64_RELATIVE
+        Addend:          4368
+      - Offset:          0x3E18
+        Type:            R_X86_64_RELATIVE
+        Addend:          4304
+      - Offset:          0x4020
+        Type:            R_X86_64_RELATIVE
+        Addend:          16416
+      - Offset:          0x3FE0
+        Symbol:          _ITM_deregisterTMCloneTable
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FE8
+        Symbol:          __gmon_start__
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FF0
+        Symbol:          _ITM_registerTMCloneTable
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FF8
+        Symbol:          __cxa_finalize
+        Type:            R_X86_64_GLOB_DAT
+  - Name:            .rela.plt
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC, SHF_INFO_LINK ]
+    Address:         0x510
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Info:            .got.plt
+    Relocations:
+      - Offset:          0x4018
+        Symbol:          puts
+        Type:            R_X86_64_JUMP_SLOT
+  - Name:            .init
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1000
+    AddressAlign:    0x4
+    Offset:          0x1000
+    Content:         F30F1EFA4883EC08488B05D92F00004885C07402FFD04883C408C3
+  - Name:            .plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1020
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         FF35E22F0000F2FF25E32F00000F1F00F30F1EFA6800000000F2E9E1FFFFFF90
+  - Name:            .plt.got
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1040
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         F30F1EFAF2FF25AD2F00000F1F440000
+  - Name:            .plt.sec
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1050
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         F30F1EFAF2FF25BD2F00000F1F440000
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1060
+    AddressAlign:    0x10
+    Content:         488D3DC12F0000488D05BA2F00004839F87415488B05662F00004885C07409FFE00F1F8000000000C30F1F8000000000488D3D912F0000488D358A2F00004829FE4889F048C1EE3F48C1F8034801C648D1FE7414488B05352F00004885C07408FFE0660F1F440000C30F1F8000000000F30F1EFA803D4D2F000000752B5548833D122F0000004889E5740C488B3D2E2F0000E849FFFFFFE864FFFFFFC605252F0000015DC30F1F00C30F1F8000000000F30F1EFAE977FFFFFFF30F1EFA554889E5488D05D80E00004889C7E820FFFFFF905DC3
+  - Name:            .fini
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1134
+    AddressAlign:    0x4
+    Content:         F30F1EFA4883EC084883C408C3
+  - Name:            .rodata
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2000
+    AddressAlign:    0x1
+    Offset:          0x2000
+    Content:         48656C6C6F2066726F6D204100
+  - Name:            .eh_frame_hdr
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2010
+    AddressAlign:    0x4
+    Content:         011B033B2C0000000400000010F0FFFF4800000030F0FFFF7000000040F0FFFF8800000009F1FFFFA0000000
+  - Name:            .eh_frame
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2040
+    AddressAlign:    0x8
+    Content:         1400000000000000017A5200017810011B0C070890010000240000001C000000C0EFFFFF20000000000E10460E184A0F0B770880003F1A3A2A332422000000001400000044000000B8EFFFFF100000000000000000000000140000005C000000B0EFFFFF1000000000000000000000001C0000007400000061F0FFFF1A00000000450E108602430D06510C070800000000000000
+  - Name:            .init_array
+    Type:            SHT_INIT_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E10
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Offset:          0x2E10
+    Content:         '1011000000000000'
+  - Name:            .fini_array
+    Type:            SHT_FINI_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E18
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         D010000000000000
+  - Name:            .dynamic
+    Type:            SHT_DYNAMIC
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E20
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Entries:
+      - Tag:             DT_NEEDED
+        Value:           0x5F
+      - Tag:             DT_INIT
+        Value:           0x1000
+      - Tag:             DT_FINI
+        Value:           0x1134
+      - Tag:             DT_INIT_ARRAY
+        Value:           0x3E10
+      - Tag:             DT_INIT_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_FINI_ARRAY
+        Value:           0x3E18
+      - Tag:             DT_FINI_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_GNU_HASH
+        Value:           0x2F0
+      - Tag:             DT_STRTAB
+        Value:           0x3C0
+      - Tag:             DT_SYMTAB
+        Value:           0x318
+      - Tag:             DT_STRSZ
+        Value:           0x75
+      - Tag:             DT_SYMENT
+        Value:           0x18
+      - Tag:             DT_PLTGOT
+        Value:           0x4000
+      - Tag:             DT_PLTRELSZ
+        Value:           0x18
+      - Tag:             DT_PLTREL
+        Value:           0x7
+      - Tag:             DT_JMPREL
+        Value:           0x510
+      - Tag:             DT_RELA
+        Value:           0x468
+      - Tag:             DT_RELASZ
+        Value:           0xA8
+      - Tag:             DT_RELAENT
+        Value:           0x18
+      - Tag:             DT_VERNEED
+        Value:           0x448
+      - Tag:             DT_VERNEEDNUM
+        Value:           0x1
+      - Tag:             DT_VERSYM
+        Value:           0x436
+      - Tag:             DT_RELACOUNT
+        Value:           0x3
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+  - Name:            .got
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3FE0
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '0000000000000000000000000000000000000000000000000000000000000000'
+  - Name:            .got.plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4000
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '203E000000000000000000000000000000000000000000003010000000000000'
+  - Name:            .data
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4020
+    AddressAlign:    0x8
+    Content:         '2040000000000000'
+  - Name:            .bss
+    Type:            SHT_NOBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4028
+    AddressAlign:    0x1
+    Size:            0x8
+  - Name:            .comment
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         4743433A20285562756E74752031312E342E302D317562756E7475317E32322E30342E32292031312E342E3000
+Symbols:
+  - Name:            crtstuff.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            deregister_tm_clones
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1060
+  - Name:            register_tm_clones
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1090
+  - Name:            __do_global_dtors_aux
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x10D0
+  - Name:            completed.0
+    Type:            STT_OBJECT
+    Section:         .bss
+    Value:           0x4028
+    Size:            0x1
+  - Name:            __do_global_dtors_aux_fini_array_entry
+    Type:            STT_OBJECT
+    Section:         .fini_array
+    Value:           0x3E18
+  - Name:            frame_dummy
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1110
+  - Name:            __frame_dummy_init_array_entry
+    Type:            STT_OBJECT
+    Section:         .init_array
+    Value:           0x3E10
+  - Name:            libA.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            'crtstuff.c (1)'
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            __FRAME_END__
+    Type:            STT_OBJECT
+    Section:         .eh_frame
+    Value:           0x20D0
+  - Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            _fini
+    Type:            STT_FUNC
+    Section:         .fini
+    Value:           0x1134
+  - Name:            __dso_handle
+    Type:            STT_OBJECT
+    Section:         .data
+    Value:           0x4020
+  - Name:            _DYNAMIC
+    Type:            STT_OBJECT
+    Section:         .dynamic
+    Value:           0x3E20
+  - Name:            __GNU_EH_FRAME_HDR
+    Section:         .eh_frame_hdr
+    Value:           0x2010
+  - Name:            __TMC_END__
+    Type:            STT_OBJECT
+    Section:         .data
+    Value:           0x4028
+  - Name:            _GLOBAL_OFFSET_TABLE_
+    Type:            STT_OBJECT
+    Section:         .got.plt
+    Value:           0x4000
+  - Name:            _init
+    Type:            STT_FUNC
+    Section:         .init
+    Value:           0x1000
+  - Name:            _ITM_deregisterTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            'puts@GLIBC_2.2.5'
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            sayA
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x1119
+    Size:            0x1A
+  - Name:            __gmon_start__
+    Binding:         STB_WEAK
+  - Name:            _ITM_registerTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            '__cxa_finalize@GLIBC_2.2.5'
+    Type:            STT_FUNC
+    Binding:         STB_WEAK
+DynamicSymbols:
+  - Name:            _ITM_deregisterTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            puts
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            __gmon_start__
+    Binding:         STB_WEAK
+  - Name:            _ITM_registerTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            __cxa_finalize
+    Type:            STT_FUNC
+    Binding:         STB_WEAK
+  - Name:            sayA
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x1119
+    Size:            0x1A
+...
diff --git a/llvm/unittests/ExecutionEngine/Orc/Inputs/A/A_macho.yaml b/llvm/unittests/ExecutionEngine/Orc/Inputs/A/A_macho.yaml
new file mode 100644
index 0000000..2e851a90
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/Orc/Inputs/A/A_macho.yaml
@@ -0,0 +1,723 @@
+--- !fat-mach-o
+FatHeader:
+  magic:           0xCAFEBABE
+  nfat_arch:       3
+FatArchs:
+  - cputype:         0x1000007
+    cpusubtype:      0x3
+    offset:          0x1000
+    size:            8376
+    align:           12
+  - cputype:         0x100000C
+    cpusubtype:      0x0
+    offset:          0x4000
+    size:            33376
+    align:           14
+  - cputype:         0x100000C
+    cpusubtype:      0x80000002
+    offset:          0x10000
+    size:            33376
+    align:           14
+Slices:
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x1000007
+      cpusubtype:      0x3
+      filetype:        0x6
+      ncmds:           14
+      sizeofcmds:      960
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         392
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          4096
+        fileoff:         0
+        filesize:        4096
+        maxprot:         5
+        initprot:        5
+        nsects:          4
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0xF80
+            size:            20
+            offset:          0xF80
+            align:           4
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         554889E5488D3D0F000000B000E8020000005DC3
+          - sectname:        __stubs
+            segname:         __TEXT
+            addr:            0xF94
+            size:            6
+            offset:          0xF94
+            align:           1
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0x6
+            reserved3:       0x0
+            content:         FF2566000000
+          - sectname:        __cstring
+            segname:         __TEXT
+            addr:            0xF9A
+            size:            14
+            offset:          0xF9A
+            align:           0
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x2
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         48656C6C6F2066726F6D20410A00
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0xFA8
+            size:            88
+            offset:          0xFA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000800F00004000000040000000940F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000100000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          4096
+        vmsize:          4096
+        fileoff:         4096
+        filesize:        4096
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __got
+            segname:         __DATA_CONST
+            addr:            0x1000
+            size:            8
+            offset:          0x1000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x1
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         '0000000000000080'
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          8192
+        vmsize:          4096
+        fileoff:         8192
+        filesize:        184
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libA.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         8192
+        datasize:        96
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         8288
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          8320
+        nsyms:           2
+        stroff:          8360
+        strsize:         16
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       0
+        iextdefsym:      0
+        nextdefsym:      1
+        iundefsym:       1
+        nundefsym:       1
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  8352
+        nindirectsyms:   2
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            ADFFA141-C3EE-37CD-B1E7-906D69F81BCB
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         8312
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         8320
+        datasize:        0
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayA
+            Flags:           0x0
+            Address:         0xF80
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         3968
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayA
+        - _printf
+      IndirectSymbols: [ 0x1, 0x1 ]
+      FunctionStarts:  [ 0xF80 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x50, 0x0, 0x0, 0x0, 0x1, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x10, 0x6, 0x0, 
+                         0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x2, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5F, 0x70, 0x72, 
+                         0x69, 0x6E, 0x74, 0x66, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0 ]
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x100000C
+      cpusubtype:      0x0
+      filetype:        0x6
+      ncmds:           15
+      sizeofcmds:      976
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         392
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          16384
+        fileoff:         0
+        filesize:        16384
+        maxprot:         5
+        initprot:        5
+        nsects:          4
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0x3F70
+            size:            28
+            offset:          0x3F70
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         FD7BBFA9FD0300910000009000603E9103000094FD7BC1A8C0035FD6
+          - sectname:        __stubs
+            segname:         __TEXT
+            addr:            0x3F8C
+            size:            12
+            offset:          0x3F8C
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0xC
+            reserved3:       0x0
+            content:         100000B0100240F900021FD6
+          - sectname:        __cstring
+            segname:         __TEXT
+            addr:            0x3F98
+            size:            14
+            offset:          0x3F98
+            align:           0
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x2
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         48656C6C6F2066726F6D20410A00
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0x3FA8
+            size:            88
+            offset:          0x3FA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000703F000040000000400000008C3F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000400000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          16384
+        vmsize:          16384
+        fileoff:         16384
+        filesize:        16384
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __got
+            segname:         __DATA_CONST
+            addr:            0x4000
+            size:            8
+            offset:          0x4000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x1
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         '0000000000000080'
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          32768
+        vmsize:          16384
+        fileoff:         32768
+        filesize:        608
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libA.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         32768
+        datasize:        96
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         32864
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          32896
+        nsyms:           2
+        stroff:          32936
+        strsize:         16
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       0
+        iextdefsym:      0
+        nextdefsym:      1
+        iundefsym:       1
+        nundefsym:       1
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  32928
+        nindirectsyms:   2
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            C45227E0-C6C0-3137-969B-36AABF9D5487
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         32888
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         32896
+        datasize:        0
+      - cmd:             LC_CODE_SIGNATURE
+        cmdsize:         16
+        dataoff:         32960
+        datasize:        416
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayA
+            Flags:           0x0
+            Address:         0x3F70
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         16240
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayA
+        - _printf
+      IndirectSymbols: [ 0x1, 0x1 ]
+      FunctionStarts:  [ 0x3F70 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x50, 0x0, 0x0, 0x0, 0x1, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x40, 0x6, 0x0, 
+                         0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x2, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5F, 0x70, 0x72, 
+                         0x69, 0x6E, 0x74, 0x66, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0 ]
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x100000C
+      cpusubtype:      0x80000002
+      filetype:        0x6
+      ncmds:           15
+      sizeofcmds:      976
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         392
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          16384
+        fileoff:         0
+        filesize:        16384
+        maxprot:         5
+        initprot:        5
+        nsects:          4
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0x3F68
+            size:            32
+            offset:          0x3F68
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         7F2303D5FD7BBFA9FD0300910000009000603E9103000094FD7BC1A8FF0F5FD6
+          - sectname:        __auth_stubs
+            segname:         __TEXT
+            addr:            0x3F88
+            size:            16
+            offset:          0x3F88
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0x10
+            reserved3:       0x0
+            content:         110000B031020091300240F9110A1FD7
+          - sectname:        __cstring
+            segname:         __TEXT
+            addr:            0x3F98
+            size:            14
+            offset:          0x3F98
+            align:           0
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x2
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         48656C6C6F2066726F6D20410A00
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0x3FA8
+            size:            88
+            offset:          0x3FA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000683F00004000000040000000883F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000400000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          16384
+        vmsize:          16384
+        fileoff:         16384
+        filesize:        16384
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __auth_got
+            segname:         __DATA_CONST
+            addr:            0x4000
+            size:            8
+            offset:          0x4000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x1
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         00000000000001C0
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          32768
+        vmsize:          16384
+        fileoff:         32768
+        filesize:        608
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libA.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         32768
+        datasize:        96
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         32864
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          32896
+        nsyms:           2
+        stroff:          32936
+        strsize:         16
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       0
+        iextdefsym:      0
+        nextdefsym:      1
+        iundefsym:       1
+        nundefsym:       1
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  32928
+        nindirectsyms:   2
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            C9DC00C2-E721-365C-9C2D-E9FDB7C838BB
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         32888
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         32896
+        datasize:        0
+      - cmd:             LC_CODE_SIGNATURE
+        cmdsize:         16
+        dataoff:         32960
+        datasize:        416
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayA
+            Flags:           0x0
+            Address:         0x3F68
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         16232
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayA
+        - _printf
+      IndirectSymbols: [ 0x1, 0x1 ]
+      FunctionStarts:  [ 0x3F68 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x50, 0x0, 0x0, 0x0, 0x1, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x40, 0xC, 0x0, 
+                         0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x2, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5F, 0x70, 0x72, 
+                         0x69, 0x6E, 0x74, 0x66, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0 ]
+...
diff --git a/llvm/unittests/ExecutionEngine/Orc/Inputs/B/B_linux.yaml b/llvm/unittests/ExecutionEngine/Orc/Inputs/B/B_linux.yaml
new file mode 100644
index 0000000..fe4393e
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/Orc/Inputs/B/B_linux.yaml
@@ -0,0 +1,460 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_DYN
+  Machine:         EM_X86_64
+ProgramHeaders:
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .rela.plt
+    Align:           0x1000
+    Offset:          0x0
+  - Type:            PT_LOAD
+    Flags:           [ PF_X, PF_R ]
+    FirstSec:        .init
+    LastSec:         .fini
+    VAddr:           0x1000
+    Align:           0x1000
+    Offset:          0x1000
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .rodata
+    LastSec:         .eh_frame
+    VAddr:           0x2000
+    Align:           0x1000
+    Offset:          0x2000
+  - Type:            PT_LOAD
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .bss
+    VAddr:           0x3E10
+    Align:           0x1000
+    Offset:          0x2E10
+  - Type:            PT_DYNAMIC
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .dynamic
+    LastSec:         .dynamic
+    VAddr:           0x3E20
+    Align:           0x8
+    Offset:          0x2E20
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .note.gnu.property
+    VAddr:           0x2A8
+    Align:           0x8
+    Offset:          0x2A8
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.build-id
+    LastSec:         .note.gnu.build-id
+    VAddr:           0x2C8
+    Align:           0x4
+    Offset:          0x2C8
+  - Type:            PT_GNU_PROPERTY
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .note.gnu.property
+    VAddr:           0x2A8
+    Align:           0x8
+    Offset:          0x2A8
+  - Type:            PT_GNU_EH_FRAME
+    Flags:           [ PF_R ]
+    FirstSec:        .eh_frame_hdr
+    LastSec:         .eh_frame_hdr
+    VAddr:           0x2010
+    Align:           0x4
+    Offset:          0x2010
+  - Type:            PT_GNU_STACK
+    Flags:           [ PF_W, PF_R ]
+    Align:           0x10
+    Offset:          0x0
+  - Type:            PT_GNU_RELRO
+    Flags:           [ PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .got
+    VAddr:           0x3E10
+    Offset:          0x2E10
+Sections:
+  - Name:            .note.gnu.property
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2A8
+    AddressAlign:    0x8
+    Notes:
+      - Name:            GNU
+        Desc:            020000C0040000000300000000000000
+        Type:            NT_GNU_PROPERTY_TYPE_0
+  - Name:            .note.gnu.build-id
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2C8
+    AddressAlign:    0x4
+    Notes:
+      - Name:            GNU
+        Desc:            6337F7C1BF21A1DE17630C55602EB4CAC50435BB
+        Type:            NT_PRPSINFO
+  - Name:            .gnu.hash
+    Type:            SHT_GNU_HASH
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2F0
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Header:
+      SymNdx:          0x6
+      Shift2:          0x6
+    BloomFilter:     [ 0x400000100000 ]
+    HashBuckets:     [ 0x6, 0x0 ]
+    HashValues:      [ 0x7C9DCB95 ]
+  - Name:            .dynsym
+    Type:            SHT_DYNSYM
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x318
+    Link:            .dynstr
+    AddressAlign:    0x8
+  - Name:            .dynstr
+    Type:            SHT_STRTAB
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x3C0
+    AddressAlign:    0x1
+  - Name:            .gnu.version
+    Type:            SHT_GNU_versym
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x436
+    Link:            .dynsym
+    AddressAlign:    0x2
+    Entries:         [ 0, 1, 2, 1, 1, 2, 1 ]
+  - Name:            .gnu.version_r
+    Type:            SHT_GNU_verneed
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x448
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Dependencies:
+      - Version:         1
+        File:            libc.so.6
+        Entries:
+          - Name:            GLIBC_2.2.5
+            Hash:            157882997
+            Flags:           0
+            Other:           2
+  - Name:            .rela.dyn
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x468
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Relocations:
+      - Offset:          0x3E10
+        Type:            R_X86_64_RELATIVE
+        Addend:          4368
+      - Offset:          0x3E18
+        Type:            R_X86_64_RELATIVE
+        Addend:          4304
+      - Offset:          0x4020
+        Type:            R_X86_64_RELATIVE
+        Addend:          16416
+      - Offset:          0x3FE0
+        Symbol:          _ITM_deregisterTMCloneTable
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FE8
+        Symbol:          __gmon_start__
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FF0
+        Symbol:          _ITM_registerTMCloneTable
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FF8
+        Symbol:          __cxa_finalize
+        Type:            R_X86_64_GLOB_DAT
+  - Name:            .rela.plt
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC, SHF_INFO_LINK ]
+    Address:         0x510
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Info:            .got.plt
+    Relocations:
+      - Offset:          0x4018
+        Symbol:          puts
+        Type:            R_X86_64_JUMP_SLOT
+  - Name:            .init
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1000
+    AddressAlign:    0x4
+    Offset:          0x1000
+    Content:         F30F1EFA4883EC08488B05D92F00004885C07402FFD04883C408C3
+  - Name:            .plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1020
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         FF35E22F0000F2FF25E32F00000F1F00F30F1EFA6800000000F2E9E1FFFFFF90
+  - Name:            .plt.got
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1040
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         F30F1EFAF2FF25AD2F00000F1F440000
+  - Name:            .plt.sec
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1050
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         F30F1EFAF2FF25BD2F00000F1F440000
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1060
+    AddressAlign:    0x10
+    Content:         488D3DC12F0000488D05BA2F00004839F87415488B05662F00004885C07409FFE00F1F8000000000C30F1F8000000000488D3D912F0000488D358A2F00004829FE4889F048C1EE3F48C1F8034801C648D1FE7414488B05352F00004885C07408FFE0660F1F440000C30F1F8000000000F30F1EFA803D4D2F000000752B5548833D122F0000004889E5740C488B3D2E2F0000E849FFFFFFE864FFFFFFC605252F0000015DC30F1F00C30F1F8000000000F30F1EFAE977FFFFFFF30F1EFA554889E5488D05D80E00004889C7E820FFFFFF905DC3
+  - Name:            .fini
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1134
+    AddressAlign:    0x4
+    Content:         F30F1EFA4883EC084883C408C3
+  - Name:            .rodata
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2000
+    AddressAlign:    0x1
+    Offset:          0x2000
+    Content:         48656C6C6F2066726F6D204200
+  - Name:            .eh_frame_hdr
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2010
+    AddressAlign:    0x4
+    Content:         011B033B2C0000000400000010F0FFFF4800000030F0FFFF7000000040F0FFFF8800000009F1FFFFA0000000
+  - Name:            .eh_frame
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2040
+    AddressAlign:    0x8
+    Content:         1400000000000000017A5200017810011B0C070890010000240000001C000000C0EFFFFF20000000000E10460E184A0F0B770880003F1A3A2A332422000000001400000044000000B8EFFFFF100000000000000000000000140000005C000000B0EFFFFF1000000000000000000000001C0000007400000061F0FFFF1A00000000450E108602430D06510C070800000000000000
+  - Name:            .init_array
+    Type:            SHT_INIT_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E10
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Offset:          0x2E10
+    Content:         '1011000000000000'
+  - Name:            .fini_array
+    Type:            SHT_FINI_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E18
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         D010000000000000
+  - Name:            .dynamic
+    Type:            SHT_DYNAMIC
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E20
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Entries:
+      - Tag:             DT_NEEDED
+        Value:           0x5F
+      - Tag:             DT_INIT
+        Value:           0x1000
+      - Tag:             DT_FINI
+        Value:           0x1134
+      - Tag:             DT_INIT_ARRAY
+        Value:           0x3E10
+      - Tag:             DT_INIT_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_FINI_ARRAY
+        Value:           0x3E18
+      - Tag:             DT_FINI_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_GNU_HASH
+        Value:           0x2F0
+      - Tag:             DT_STRTAB
+        Value:           0x3C0
+      - Tag:             DT_SYMTAB
+        Value:           0x318
+      - Tag:             DT_STRSZ
+        Value:           0x75
+      - Tag:             DT_SYMENT
+        Value:           0x18
+      - Tag:             DT_PLTGOT
+        Value:           0x4000
+      - Tag:             DT_PLTRELSZ
+        Value:           0x18
+      - Tag:             DT_PLTREL
+        Value:           0x7
+      - Tag:             DT_JMPREL
+        Value:           0x510
+      - Tag:             DT_RELA
+        Value:           0x468
+      - Tag:             DT_RELASZ
+        Value:           0xA8
+      - Tag:             DT_RELAENT
+        Value:           0x18
+      - Tag:             DT_VERNEED
+        Value:           0x448
+      - Tag:             DT_VERNEEDNUM
+        Value:           0x1
+      - Tag:             DT_VERSYM
+        Value:           0x436
+      - Tag:             DT_RELACOUNT
+        Value:           0x3
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+  - Name:            .got
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3FE0
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '0000000000000000000000000000000000000000000000000000000000000000'
+  - Name:            .got.plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4000
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '203E000000000000000000000000000000000000000000003010000000000000'
+  - Name:            .data
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4020
+    AddressAlign:    0x8
+    Content:         '2040000000000000'
+  - Name:            .bss
+    Type:            SHT_NOBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4028
+    AddressAlign:    0x1
+    Size:            0x8
+  - Name:            .comment
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         4743433A20285562756E74752031312E342E302D317562756E7475317E32322E30342E32292031312E342E3000
+Symbols:
+  - Name:            crtstuff.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            deregister_tm_clones
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1060
+  - Name:            register_tm_clones
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1090
+  - Name:            __do_global_dtors_aux
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x10D0
+  - Name:            completed.0
+    Type:            STT_OBJECT
+    Section:         .bss
+    Value:           0x4028
+    Size:            0x1
+  - Name:            __do_global_dtors_aux_fini_array_entry
+    Type:            STT_OBJECT
+    Section:         .fini_array
+    Value:           0x3E18
+  - Name:            frame_dummy
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1110
+  - Name:            __frame_dummy_init_array_entry
+    Type:            STT_OBJECT
+    Section:         .init_array
+    Value:           0x3E10
+  - Name:            libB.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            'crtstuff.c (1)'
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            __FRAME_END__
+    Type:            STT_OBJECT
+    Section:         .eh_frame
+    Value:           0x20D0
+  - Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            _fini
+    Type:            STT_FUNC
+    Section:         .fini
+    Value:           0x1134
+  - Name:            __dso_handle
+    Type:            STT_OBJECT
+    Section:         .data
+    Value:           0x4020
+  - Name:            _DYNAMIC
+    Type:            STT_OBJECT
+    Section:         .dynamic
+    Value:           0x3E20
+  - Name:            __GNU_EH_FRAME_HDR
+    Section:         .eh_frame_hdr
+    Value:           0x2010
+  - Name:            __TMC_END__
+    Type:            STT_OBJECT
+    Section:         .data
+    Value:           0x4028
+  - Name:            _GLOBAL_OFFSET_TABLE_
+    Type:            STT_OBJECT
+    Section:         .got.plt
+    Value:           0x4000
+  - Name:            _init
+    Type:            STT_FUNC
+    Section:         .init
+    Value:           0x1000
+  - Name:            _ITM_deregisterTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            'puts@GLIBC_2.2.5'
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            __gmon_start__
+    Binding:         STB_WEAK
+  - Name:            sayB
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x1119
+    Size:            0x1A
+  - Name:            _ITM_registerTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            '__cxa_finalize@GLIBC_2.2.5'
+    Type:            STT_FUNC
+    Binding:         STB_WEAK
+DynamicSymbols:
+  - Name:            _ITM_deregisterTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            puts
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            __gmon_start__
+    Binding:         STB_WEAK
+  - Name:            _ITM_registerTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            __cxa_finalize
+    Type:            STT_FUNC
+    Binding:         STB_WEAK
+  - Name:            sayB
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x1119
+    Size:            0x1A
+...
diff --git a/llvm/unittests/ExecutionEngine/Orc/Inputs/B/B_macho.yaml b/llvm/unittests/ExecutionEngine/Orc/Inputs/B/B_macho.yaml
new file mode 100644
index 0000000..3d57c4f
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/Orc/Inputs/B/B_macho.yaml
@@ -0,0 +1,723 @@
+--- !fat-mach-o
+FatHeader:
+  magic:           0xCAFEBABE
+  nfat_arch:       3
+FatArchs:
+  - cputype:         0x1000007
+    cpusubtype:      0x3
+    offset:          0x1000
+    size:            8376
+    align:           12
+  - cputype:         0x100000C
+    cpusubtype:      0x0
+    offset:          0x4000
+    size:            33376
+    align:           14
+  - cputype:         0x100000C
+    cpusubtype:      0x80000002
+    offset:          0x10000
+    size:            33376
+    align:           14
+Slices:
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x1000007
+      cpusubtype:      0x3
+      filetype:        0x6
+      ncmds:           14
+      sizeofcmds:      960
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         392
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          4096
+        fileoff:         0
+        filesize:        4096
+        maxprot:         5
+        initprot:        5
+        nsects:          4
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0xF80
+            size:            20
+            offset:          0xF80
+            align:           4
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         554889E5488D3D0F000000B000E8020000005DC3
+          - sectname:        __stubs
+            segname:         __TEXT
+            addr:            0xF94
+            size:            6
+            offset:          0xF94
+            align:           1
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0x6
+            reserved3:       0x0
+            content:         FF2566000000
+          - sectname:        __cstring
+            segname:         __TEXT
+            addr:            0xF9A
+            size:            14
+            offset:          0xF9A
+            align:           0
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x2
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         48656C6C6F2066726F6D20420A00
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0xFA8
+            size:            88
+            offset:          0xFA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000800F00004000000040000000940F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000100000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          4096
+        vmsize:          4096
+        fileoff:         4096
+        filesize:        4096
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __got
+            segname:         __DATA_CONST
+            addr:            0x1000
+            size:            8
+            offset:          0x1000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x1
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         '0000000000000080'
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          8192
+        vmsize:          4096
+        fileoff:         8192
+        filesize:        184
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libB.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         8192
+        datasize:        96
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         8288
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          8320
+        nsyms:           2
+        stroff:          8360
+        strsize:         16
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       0
+        iextdefsym:      0
+        nextdefsym:      1
+        iundefsym:       1
+        nundefsym:       1
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  8352
+        nindirectsyms:   2
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            88B60B3C-13D3-3D7E-AEED-5F3E991FDF08
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         8312
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         8320
+        datasize:        0
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayB
+            Flags:           0x0
+            Address:         0xF80
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         3968
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayB
+        - _printf
+      IndirectSymbols: [ 0x1, 0x1 ]
+      FunctionStarts:  [ 0xF80 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x50, 0x0, 0x0, 0x0, 0x1, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x10, 0x6, 0x0, 
+                         0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x2, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5F, 0x70, 0x72, 
+                         0x69, 0x6E, 0x74, 0x66, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0 ]
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x100000C
+      cpusubtype:      0x0
+      filetype:        0x6
+      ncmds:           15
+      sizeofcmds:      976
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         392
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          16384
+        fileoff:         0
+        filesize:        16384
+        maxprot:         5
+        initprot:        5
+        nsects:          4
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0x3F70
+            size:            28
+            offset:          0x3F70
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         FD7BBFA9FD0300910000009000603E9103000094FD7BC1A8C0035FD6
+          - sectname:        __stubs
+            segname:         __TEXT
+            addr:            0x3F8C
+            size:            12
+            offset:          0x3F8C
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0xC
+            reserved3:       0x0
+            content:         100000B0100240F900021FD6
+          - sectname:        __cstring
+            segname:         __TEXT
+            addr:            0x3F98
+            size:            14
+            offset:          0x3F98
+            align:           0
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x2
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         48656C6C6F2066726F6D20420A00
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0x3FA8
+            size:            88
+            offset:          0x3FA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000703F000040000000400000008C3F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000400000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          16384
+        vmsize:          16384
+        fileoff:         16384
+        filesize:        16384
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __got
+            segname:         __DATA_CONST
+            addr:            0x4000
+            size:            8
+            offset:          0x4000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x1
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         '0000000000000080'
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          32768
+        vmsize:          16384
+        fileoff:         32768
+        filesize:        608
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libB.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         32768
+        datasize:        96
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         32864
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          32896
+        nsyms:           2
+        stroff:          32936
+        strsize:         16
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       0
+        iextdefsym:      0
+        nextdefsym:      1
+        iundefsym:       1
+        nundefsym:       1
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  32928
+        nindirectsyms:   2
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            90C3787A-22E1-35AE-9284-97A4842F88AF
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         32888
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         32896
+        datasize:        0
+      - cmd:             LC_CODE_SIGNATURE
+        cmdsize:         16
+        dataoff:         32960
+        datasize:        416
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayB
+            Flags:           0x0
+            Address:         0x3F70
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         16240
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayB
+        - _printf
+      IndirectSymbols: [ 0x1, 0x1 ]
+      FunctionStarts:  [ 0x3F70 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x50, 0x0, 0x0, 0x0, 0x1, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x40, 0x6, 0x0, 
+                         0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x2, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5F, 0x70, 0x72, 
+                         0x69, 0x6E, 0x74, 0x66, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0 ]
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x100000C
+      cpusubtype:      0x80000002
+      filetype:        0x6
+      ncmds:           15
+      sizeofcmds:      976
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         392
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          16384
+        fileoff:         0
+        filesize:        16384
+        maxprot:         5
+        initprot:        5
+        nsects:          4
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0x3F68
+            size:            32
+            offset:          0x3F68
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         7F2303D5FD7BBFA9FD0300910000009000603E9103000094FD7BC1A8FF0F5FD6
+          - sectname:        __auth_stubs
+            segname:         __TEXT
+            addr:            0x3F88
+            size:            16
+            offset:          0x3F88
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0x10
+            reserved3:       0x0
+            content:         110000B031020091300240F9110A1FD7
+          - sectname:        __cstring
+            segname:         __TEXT
+            addr:            0x3F98
+            size:            14
+            offset:          0x3F98
+            align:           0
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x2
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         48656C6C6F2066726F6D20420A00
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0x3FA8
+            size:            88
+            offset:          0x3FA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000683F00004000000040000000883F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000400000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          16384
+        vmsize:          16384
+        fileoff:         16384
+        filesize:        16384
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __auth_got
+            segname:         __DATA_CONST
+            addr:            0x4000
+            size:            8
+            offset:          0x4000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x1
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         00000000000001C0
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          32768
+        vmsize:          16384
+        fileoff:         32768
+        filesize:        608
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libB.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         32768
+        datasize:        96
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         32864
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          32896
+        nsyms:           2
+        stroff:          32936
+        strsize:         16
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       0
+        iextdefsym:      0
+        nextdefsym:      1
+        iundefsym:       1
+        nundefsym:       1
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  32928
+        nindirectsyms:   2
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            76B41B3A-00EC-388B-A432-478A96772CC4
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         32888
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         32896
+        datasize:        0
+      - cmd:             LC_CODE_SIGNATURE
+        cmdsize:         16
+        dataoff:         32960
+        datasize:        416
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayB
+            Flags:           0x0
+            Address:         0x3F68
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         16232
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayB
+        - _printf
+      IndirectSymbols: [ 0x1, 0x1 ]
+      FunctionStarts:  [ 0x3F68 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x50, 0x0, 0x0, 0x0, 0x1, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x40, 0xC, 0x0, 
+                         0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x2, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5F, 0x70, 0x72, 
+                         0x69, 0x6E, 0x74, 0x66, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0 ]
+...
diff --git a/llvm/unittests/ExecutionEngine/Orc/Inputs/C/C_linux.yaml b/llvm/unittests/ExecutionEngine/Orc/Inputs/C/C_linux.yaml
new file mode 100644
index 0000000..8b75b1e
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/Orc/Inputs/C/C_linux.yaml
@@ -0,0 +1,461 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_DYN
+  Machine:         EM_X86_64
+ProgramHeaders:
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .rela.plt
+    Align:           0x1000
+    Offset:          0x0
+  - Type:            PT_LOAD
+    Flags:           [ PF_X, PF_R ]
+    FirstSec:        .init
+    LastSec:         .fini
+    VAddr:           0x1000
+    Align:           0x1000
+    Offset:          0x1000
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .eh_frame_hdr
+    LastSec:         .eh_frame
+    VAddr:           0x2000
+    Align:           0x1000
+    Offset:          0x2000
+  - Type:            PT_LOAD
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .bss
+    VAddr:           0x3E00
+    Align:           0x1000
+    Offset:          0x2E00
+  - Type:            PT_DYNAMIC
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .dynamic
+    LastSec:         .dynamic
+    VAddr:           0x3E10
+    Align:           0x8
+    Offset:          0x2E10
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .note.gnu.property
+    VAddr:           0x2A8
+    Align:           0x8
+    Offset:          0x2A8
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.build-id
+    LastSec:         .note.gnu.build-id
+    VAddr:           0x2C8
+    Align:           0x4
+    Offset:          0x2C8
+  - Type:            PT_GNU_PROPERTY
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .note.gnu.property
+    VAddr:           0x2A8
+    Align:           0x8
+    Offset:          0x2A8
+  - Type:            PT_GNU_EH_FRAME
+    Flags:           [ PF_R ]
+    FirstSec:        .eh_frame_hdr
+    LastSec:         .eh_frame_hdr
+    VAddr:           0x2000
+    Align:           0x4
+    Offset:          0x2000
+  - Type:            PT_GNU_STACK
+    Flags:           [ PF_W, PF_R ]
+    Align:           0x10
+    Offset:          0x0
+  - Type:            PT_GNU_RELRO
+    Flags:           [ PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .got
+    VAddr:           0x3E00
+    Offset:          0x2E00
+Sections:
+  - Name:            .note.gnu.property
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2A8
+    AddressAlign:    0x8
+    Notes:
+      - Name:            GNU
+        Desc:            020000C0040000000300000000000000
+        Type:            NT_GNU_PROPERTY_TYPE_0
+  - Name:            .note.gnu.build-id
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2C8
+    AddressAlign:    0x4
+    Notes:
+      - Name:            GNU
+        Desc:            C5C3C9594A5D3556DC54D70850C6DBC316710857
+        Type:            NT_PRPSINFO
+  - Name:            .gnu.hash
+    Type:            SHT_GNU_HASH
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2F0
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Header:
+      SymNdx:          0x9
+      Shift2:          0x6
+    BloomFilter:     [ 0x400000200000 ]
+    HashBuckets:     [ 0x0, 0x9 ]
+    HashValues:      [ 0x7C9DCB95 ]
+  - Name:            .dynsym
+    Type:            SHT_DYNSYM
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x318
+    Link:            .dynstr
+    AddressAlign:    0x8
+  - Name:            .dynstr
+    Type:            SHT_STRTAB
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x408
+    AddressAlign:    0x1
+    Content:         "6C6962412E736F006C6962422E736F006C6962442E736F006C69625A2E736F00244F524947494E2F2E2E2F413A244F524947494E2F2E2E2F423A244F524947494E2F2E2E2F443A244F524947494E2F2E2E2F5A"
+  - Name:            .rela.dyn
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x4D0
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Relocations:
+      - Offset:          0x3E00
+        Type:            R_X86_64_RELATIVE
+        Addend:          4464
+      - Offset:          0x3E08
+        Type:            R_X86_64_RELATIVE
+        Addend:          4400
+      - Offset:          0x4038
+        Type:            R_X86_64_RELATIVE
+        Addend:          16440
+      - Offset:          0x3FE0
+        Symbol:          __cxa_finalize
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FE8
+        Symbol:          _ITM_registerTMCloneTable
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FF0
+        Symbol:          _ITM_deregisterTMCloneTable
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FF8
+        Symbol:          __gmon_start__
+        Type:            R_X86_64_GLOB_DAT
+  - Name:            .rela.plt
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC, SHF_INFO_LINK ]
+    Address:         0x578
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Info:            .got.plt
+    Relocations:
+      - Offset:          0x4018
+        Symbol:          sayD
+        Type:            R_X86_64_JUMP_SLOT
+      - Offset:          0x4020
+        Symbol:          sayA
+        Type:            R_X86_64_JUMP_SLOT
+      - Offset:          0x4028
+        Symbol:          sayB
+        Type:            R_X86_64_JUMP_SLOT
+      - Offset:          0x4030
+        Symbol:          sayZ
+        Type:            R_X86_64_JUMP_SLOT
+  - Name:            .init
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1000
+    AddressAlign:    0x4
+    Offset:          0x1000
+    Content:         F30F1EFA4883EC08488B05E92F00004885C07402FFD04883C408C3
+  - Name:            .plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1020
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         FF35E22F0000F2FF25E32F00000F1F00F30F1EFA6800000000F2E9E1FFFFFF90F30F1EFA6801000000F2E9D1FFFFFF90F30F1EFA6802000000F2E9C1FFFFFF90F30F1EFA6803000000F2E9B1FFFFFF90
+  - Name:            .plt.got
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1070
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         F30F1EFAF2FF25652F00000F1F440000
+  - Name:            .plt.sec
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1080
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         F30F1EFAF2FF258D2F00000F1F440000F30F1EFAF2FF25852F00000F1F440000F30F1EFAF2FF257D2F00000F1F440000F30F1EFAF2FF25752F00000F1F440000
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x10C0
+    AddressAlign:    0x10
+    Content:         488D3D792F0000488D05722F00004839F87415488B05162F00004885C07409FFE00F1F8000000000C30F1F8000000000488D3D492F0000488D35422F00004829FE4889F048C1EE3F48C1F8034801C648D1FE7414488B05CD2E00004885C07408FFE0660F1F440000C30F1F8000000000F30F1EFA803D052F000000752B5548833D9A2E0000004889E5740C488B3DE62E0000E819FFFFFFE864FFFFFFC605DD2E0000015DC30F1F00C30F1F8000000000F30F1EFAE977FFFFFFF30F1EFA554889E5B800000000E805FFFFFFB800000000E80BFFFFFFB800000000E8E1FEFFFFB800000000E807FFFFFF905DC3
+  - Name:            .fini
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x11AC
+    AddressAlign:    0x4
+    Content:         F30F1EFA4883EC084883C408C3
+  - Name:            .eh_frame_hdr
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2000
+    AddressAlign:    0x4
+    Offset:          0x2000
+    Content:         011B033B2C0000000400000020F0FFFF4800000070F0FFFF7000000080F0FFFF8800000079F1FFFFA0000000
+  - Name:            .eh_frame
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2030
+    AddressAlign:    0x8
+    Content:         1400000000000000017A5200017810011B0C070890010000240000001C000000D0EFFFFF50000000000E10460E184A0F0B770880003F1A3A2A332422000000001400000044000000F8EFFFFF100000000000000000000000140000005C000000F0EFFFFF4000000000000000000000001C00000074000000D1F0FFFF3300000000450E108602430D066A0C070800000000000000
+  - Name:            .init_array
+    Type:            SHT_INIT_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E00
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Offset:          0x2E00
+    Content:         '7011000000000000'
+  - Name:            .fini_array
+    Type:            SHT_FINI_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E08
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '3011000000000000'
+  - Name:            .dynamic
+    Type:            SHT_DYNAMIC
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E10
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Entries:
+      - Tag:             DT_NEEDED
+        Value:           0x0
+      - Tag:             DT_NEEDED
+        Value:           0x8
+      - Tag:             DT_NEEDED
+        Value:           0x10
+      - Tag:             DT_NEEDED
+        Value:           0x18
+      - Tag:             DT_RUNPATH
+        Value:           0x20
+      - Tag:             DT_INIT
+        Value:           0x1000
+      - Tag:             DT_FINI
+        Value:           0x11AC
+      - Tag:             DT_INIT_ARRAY
+        Value:           0x3E00
+      - Tag:             DT_INIT_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_FINI_ARRAY
+        Value:           0x3E08
+      - Tag:             DT_FINI_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_GNU_HASH
+        Value:           0x2F0
+      - Tag:             DT_STRTAB
+        Value:           0x408
+      - Tag:             DT_SYMTAB
+        Value:           0x318
+      - Tag:             DT_STRSZ
+        Value:           0xC2
+      - Tag:             DT_SYMENT
+        Value:           0x18
+      - Tag:             DT_PLTGOT
+        Value:           0x4000
+      - Tag:             DT_PLTRELSZ
+        Value:           0x60
+      - Tag:             DT_PLTREL
+        Value:           0x7
+      - Tag:             DT_JMPREL
+        Value:           0x578
+      - Tag:             DT_RELA
+        Value:           0x4D0
+      - Tag:             DT_RELASZ
+        Value:           0xA8
+      - Tag:             DT_RELAENT
+        Value:           0x18
+      - Tag:             DT_RELACOUNT
+        Value:           0x3
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+  - Name:            .got
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3FE0
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '0000000000000000000000000000000000000000000000000000000000000000'
+  - Name:            .got.plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4000
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '103E000000000000000000000000000000000000000000003010000000000000401000000000000050100000000000006010000000000000'
+  - Name:            .data
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4038
+    AddressAlign:    0x8
+    Content:         '3840000000000000'
+  - Name:            .bss
+    Type:            SHT_NOBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4040
+    AddressAlign:    0x1
+    Size:            0x8
+  - Name:            .comment
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         4743433A20285562756E74752031312E342E302D317562756E7475317E32322E30342E32292031312E342E3000
+Symbols:
+  - Name:            crtstuff.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            deregister_tm_clones
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x10C0
+  - Name:            register_tm_clones
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x10F0
+  - Name:            __do_global_dtors_aux
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1130
+  - Name:            completed.0
+    Type:            STT_OBJECT
+    Section:         .bss
+    Value:           0x4040
+    Size:            0x1
+  - Name:            __do_global_dtors_aux_fini_array_entry
+    Type:            STT_OBJECT
+    Section:         .fini_array
+    Value:           0x3E08
+  - Name:            frame_dummy
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1170
+  - Name:            __frame_dummy_init_array_entry
+    Type:            STT_OBJECT
+    Section:         .init_array
+    Value:           0x3E00
+  - Name:            libC.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            'crtstuff.c (1)'
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            __FRAME_END__
+    Type:            STT_OBJECT
+    Section:         .eh_frame
+    Value:           0x20C0
+  - Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            _DYNAMIC
+    Type:            STT_OBJECT
+    Section:         .dynamic
+    Value:           0x3E10
+  - Name:            __TMC_END__
+    Type:            STT_OBJECT
+    Section:         .data
+    Value:           0x4040
+  - Name:            __dso_handle
+    Type:            STT_OBJECT
+    Section:         .data
+    Value:           0x4038
+  - Name:            _init
+    Type:            STT_FUNC
+    Section:         .init
+    Value:           0x1000
+  - Name:            __GNU_EH_FRAME_HDR
+    Section:         .eh_frame_hdr
+    Value:           0x2000
+  - Name:            _fini
+    Type:            STT_FUNC
+    Section:         .fini
+    Value:           0x11AC
+  - Name:            _GLOBAL_OFFSET_TABLE_
+    Type:            STT_OBJECT
+    Section:         .got.plt
+    Value:           0x4000
+  - Name:            sayD
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            __cxa_finalize
+    Binding:         STB_WEAK
+  - Name:            sayC
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x1179
+    Size:            0x33
+  - Name:            _ITM_registerTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            _ITM_deregisterTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            sayA
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            sayB
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            sayZ
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            __gmon_start__
+    Binding:         STB_WEAK
+DynamicSymbols:
+  - Name:            sayD
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            __cxa_finalize
+    Binding:         STB_WEAK
+  - Name:            _ITM_registerTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            _ITM_deregisterTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            sayA
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            sayB
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            sayZ
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            __gmon_start__
+    Binding:         STB_WEAK
+  - Name:            sayC
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x1179
+    Size:            0x33
+...
diff --git a/llvm/unittests/ExecutionEngine/Orc/Inputs/C/C_macho.yaml b/llvm/unittests/ExecutionEngine/Orc/Inputs/C/C_macho.yaml
new file mode 100644
index 0000000..f6ad081
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/Orc/Inputs/C/C_macho.yaml
@@ -0,0 +1,915 @@
+--- !fat-mach-o
+FatHeader:
+  magic:           0xCAFEBABE
+  nfat_arch:       3
+FatArchs:
+  - cputype:         0x1000007
+    cpusubtype:      0x3
+    offset:          0x1000
+    size:            8488
+    align:           12
+  - cputype:         0x100000C
+    cpusubtype:      0x0
+    offset:          0x4000
+    size:            33488
+    align:           14
+  - cputype:         0x100000C
+    cpusubtype:      0x80000002
+    offset:          0x10000
+    size:            33488
+    align:           14
+Slices:
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x1000007
+      cpusubtype:      0x3
+      filetype:        0x6
+      ncmds:           22
+      sizeofcmds:      1200
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         312
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          4096
+        fileoff:         0
+        filesize:        4096
+        maxprot:         5
+        initprot:        5
+        nsects:          3
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0xF60
+            size:            34
+            offset:          0xF60
+            align:           4
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         554889E5B000E817000000B000E816000000B000E815000000B000E8140000005DC3
+          - sectname:        __stubs
+            segname:         __TEXT
+            addr:            0xF82
+            size:            24
+            offset:          0xF82
+            align:           1
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0x6
+            reserved3:       0x0
+            content:         FF2578000000FF257A000000FF257C000000FF257E000000
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0xF9C
+            size:            88
+            offset:          0xF9C
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000600F00004000000040000000820F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000100000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          4096
+        vmsize:          4096
+        fileoff:         4096
+        filesize:        4096
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __got
+            segname:         __DATA_CONST
+            addr:            0x1000
+            size:            32
+            offset:          0x1000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x4
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         '0000000000001080010000000000108002000000000010800300000000000080'
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          8192
+        vmsize:          4096
+        fileoff:         8192
+        filesize:        296
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libC.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         8192
+        datasize:        120
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         8312
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          8344
+        nsyms:           5
+        stroff:          8456
+        strsize:         32
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       0
+        iextdefsym:      0
+        nextdefsym:      1
+        iundefsym:       1
+        nundefsym:       4
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  8424
+        nindirectsyms:   8
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            2A1F4EC3-CD6C-3293-9D2B-5D8E42FE51EF
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libA.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libB.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libD.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libZ.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../A'
+        ZeroPadBytes:    3
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../B'
+        ZeroPadBytes:    3
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../D'
+        ZeroPadBytes:    3
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../Z'
+        ZeroPadBytes:    3
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         8336
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         8344
+        datasize:        0
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayC
+            Flags:           0x0
+            Address:         0xF60
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         3936
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+        - n_strx:          14
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          512
+          n_value:         0
+        - n_strx:          20
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          768
+          n_value:         0
+        - n_strx:          26
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          1024
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayC
+        - _sayA
+        - _sayB
+        - _sayD
+        - _sayZ
+      IndirectSymbols: [ 0x1, 0x2, 0x3, 0x4, 0x1, 0x2, 0x3, 0x4 ]
+      FunctionStarts:  [ 0xF60 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x58, 0x0, 0x0, 0x0, 0x4, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x10, 0x6, 0x0, 
+                         0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x2, 0x0, 
+                         0x0, 0x2, 0xE, 0x0, 0x0, 0x3, 0x1A, 0x0, 0x0, 
+                         0x4, 0x26, 0x0, 0x0, 0x0, 0x5F, 0x73, 0x61, 0x79, 
+                         0x41, 0x0, 0x5F, 0x73, 0x61, 0x79, 0x42, 0x0, 
+                         0x5F, 0x73, 0x61, 0x79, 0x44, 0x0, 0x5F, 0x73, 
+                         0x61, 0x79, 0x5A, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0 ]
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x100000C
+      cpusubtype:      0x0
+      filetype:        0x6
+      ncmds:           23
+      sizeofcmds:      1216
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         312
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          16384
+        fileoff:         0
+        filesize:        16384
+        maxprot:         5
+        initprot:        5
+        nsects:          3
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0x3F58
+            size:            32
+            offset:          0x3F58
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         FD7BBFA9FD03009106000094080000940A0000940C000094FD7BC1A8C0035FD6
+          - sectname:        __stubs
+            segname:         __TEXT
+            addr:            0x3F78
+            size:            48
+            offset:          0x3F78
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0xC
+            reserved3:       0x0
+            content:         100000B0100240F900021FD6100000B0100640F900021FD6100000B0100A40F900021FD6100000B0100E40F900021FD6
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0x3FA8
+            size:            88
+            offset:          0x3FA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000583F00004000000040000000783F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000400000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          16384
+        vmsize:          16384
+        fileoff:         16384
+        filesize:        16384
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __got
+            segname:         __DATA_CONST
+            addr:            0x4000
+            size:            32
+            offset:          0x4000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x4
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         '0000000000001080010000000000108002000000000010800300000000000080'
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          32768
+        vmsize:          16384
+        fileoff:         32768
+        filesize:        720
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libC.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         32768
+        datasize:        120
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         32888
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          32920
+        nsyms:           5
+        stroff:          33032
+        strsize:         32
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       0
+        iextdefsym:      0
+        nextdefsym:      1
+        iundefsym:       1
+        nundefsym:       4
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  33000
+        nindirectsyms:   8
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            6DE75070-D632-398D-8BB5-06C8C8B29147
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libA.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libB.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libD.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libZ.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../A'
+        ZeroPadBytes:    3
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../B'
+        ZeroPadBytes:    3
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../D'
+        ZeroPadBytes:    3
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../Z'
+        ZeroPadBytes:    3
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         32912
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         32920
+        datasize:        0
+      - cmd:             LC_CODE_SIGNATURE
+        cmdsize:         16
+        dataoff:         33072
+        datasize:        416
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayC
+            Flags:           0x0
+            Address:         0x3F58
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         16216
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+        - n_strx:          14
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          512
+          n_value:         0
+        - n_strx:          20
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          768
+          n_value:         0
+        - n_strx:          26
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          1024
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayC
+        - _sayA
+        - _sayB
+        - _sayD
+        - _sayZ
+      IndirectSymbols: [ 0x1, 0x2, 0x3, 0x4, 0x1, 0x2, 0x3, 0x4 ]
+      FunctionStarts:  [ 0x3F58 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x58, 0x0, 0x0, 0x0, 0x4, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x40, 0x6, 0x0, 
+                         0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x2, 0x0, 
+                         0x0, 0x2, 0xE, 0x0, 0x0, 0x3, 0x1A, 0x0, 0x0, 
+                         0x4, 0x26, 0x0, 0x0, 0x0, 0x5F, 0x73, 0x61, 0x79, 
+                         0x41, 0x0, 0x5F, 0x73, 0x61, 0x79, 0x42, 0x0, 
+                         0x5F, 0x73, 0x61, 0x79, 0x44, 0x0, 0x5F, 0x73, 
+                         0x61, 0x79, 0x5A, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0 ]
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x100000C
+      cpusubtype:      0x80000002
+      filetype:        0x6
+      ncmds:           23
+      sizeofcmds:      1216
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         312
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          16384
+        fileoff:         0
+        filesize:        16384
+        maxprot:         5
+        initprot:        5
+        nsects:          3
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0x3F44
+            size:            36
+            offset:          0x3F44
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         7F2303D5FD7BBFA9FD03009106000094090000940C0000940F000094FD7BC1A8FF0F5FD6
+          - sectname:        __auth_stubs
+            segname:         __TEXT
+            addr:            0x3F68
+            size:            64
+            offset:          0x3F68
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0x10
+            reserved3:       0x0
+            content:         110000B031020091300240F9110A1FD7110000B031220091300240F9110A1FD7110000B031420091300240F9110A1FD7110000B031620091300240F9110A1FD7
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0x3FA8
+            size:            88
+            offset:          0x3FA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000443F00004000000040000000683F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000400000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          16384
+        vmsize:          16384
+        fileoff:         16384
+        filesize:        16384
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __auth_got
+            segname:         __DATA_CONST
+            addr:            0x4000
+            size:            32
+            offset:          0x4000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x4
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         00000000000009C001000000000009C002000000000009C003000000000001C0
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          32768
+        vmsize:          16384
+        fileoff:         32768
+        filesize:        720
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libC.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         32768
+        datasize:        120
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         32888
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          32920
+        nsyms:           5
+        stroff:          33032
+        strsize:         32
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       0
+        iextdefsym:      0
+        nextdefsym:      1
+        iundefsym:       1
+        nundefsym:       4
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  33000
+        nindirectsyms:   8
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            C1E8A3F5-14B1-3BF2-B737-18AB98364487
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libA.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libB.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libD.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libZ.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../A'
+        ZeroPadBytes:    3
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../B'
+        ZeroPadBytes:    3
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../D'
+        ZeroPadBytes:    3
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../Z'
+        ZeroPadBytes:    3
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         32912
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         32920
+        datasize:        0
+      - cmd:             LC_CODE_SIGNATURE
+        cmdsize:         16
+        dataoff:         33072
+        datasize:        416
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayC
+            Flags:           0x0
+            Address:         0x3F44
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         16196
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+        - n_strx:          14
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          512
+          n_value:         0
+        - n_strx:          20
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          768
+          n_value:         0
+        - n_strx:          26
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          1024
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayC
+        - _sayA
+        - _sayB
+        - _sayD
+        - _sayZ
+      IndirectSymbols: [ 0x1, 0x2, 0x3, 0x4, 0x1, 0x2, 0x3, 0x4 ]
+      FunctionStarts:  [ 0x3F44 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x58, 0x0, 0x0, 0x0, 0x4, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x40, 0xC, 0x0, 
+                         0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x2, 0x0, 
+                         0x0, 0x2, 0xE, 0x0, 0x0, 0x3, 0x1A, 0x0, 0x0, 
+                         0x4, 0x26, 0x0, 0x0, 0x0, 0x5F, 0x73, 0x61, 0x79, 
+                         0x41, 0x0, 0x5F, 0x73, 0x61, 0x79, 0x42, 0x0, 
+                         0x5F, 0x73, 0x61, 0x79, 0x44, 0x0, 0x5F, 0x73, 
+                         0x61, 0x79, 0x5A, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0 ]
+...
diff --git a/llvm/unittests/ExecutionEngine/Orc/Inputs/D/D_linux.yaml b/llvm/unittests/ExecutionEngine/Orc/Inputs/D/D_linux.yaml
new file mode 100644
index 0000000..f4f2f36
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/Orc/Inputs/D/D_linux.yaml
@@ -0,0 +1,479 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_DYN
+  Machine:         EM_X86_64
+ProgramHeaders:
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .rela.plt
+    Align:           0x1000
+    Offset:          0x0
+  - Type:            PT_LOAD
+    Flags:           [ PF_X, PF_R ]
+    FirstSec:        .init
+    LastSec:         .fini
+    VAddr:           0x1000
+    Align:           0x1000
+    Offset:          0x1000
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .rodata
+    LastSec:         .eh_frame
+    VAddr:           0x2000
+    Align:           0x1000
+    Offset:          0x2000
+  - Type:            PT_LOAD
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .bss
+    VAddr:           0x3DF0
+    Align:           0x1000
+    Offset:          0x2DF0
+  - Type:            PT_DYNAMIC
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .dynamic
+    LastSec:         .dynamic
+    VAddr:           0x3E00
+    Align:           0x8
+    Offset:          0x2E00
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .note.gnu.property
+    VAddr:           0x2A8
+    Align:           0x8
+    Offset:          0x2A8
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.build-id
+    LastSec:         .note.gnu.build-id
+    VAddr:           0x2C8
+    Align:           0x4
+    Offset:          0x2C8
+  - Type:            PT_GNU_PROPERTY
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .note.gnu.property
+    VAddr:           0x2A8
+    Align:           0x8
+    Offset:          0x2A8
+  - Type:            PT_GNU_EH_FRAME
+    Flags:           [ PF_R ]
+    FirstSec:        .eh_frame_hdr
+    LastSec:         .eh_frame_hdr
+    VAddr:           0x2010
+    Align:           0x4
+    Offset:          0x2010
+  - Type:            PT_GNU_STACK
+    Flags:           [ PF_W, PF_R ]
+    Align:           0x10
+    Offset:          0x0
+  - Type:            PT_GNU_RELRO
+    Flags:           [ PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .got
+    VAddr:           0x3DF0
+    Offset:          0x2DF0
+Sections:
+  - Name:            .note.gnu.property
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2A8
+    AddressAlign:    0x8
+    Notes:
+      - Name:            GNU
+        Desc:            020000C0040000000300000000000000
+        Type:            NT_GNU_PROPERTY_TYPE_0
+  - Name:            .note.gnu.build-id
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2C8
+    AddressAlign:    0x4
+    Notes:
+      - Name:            GNU
+        Desc:            6A0CC906C743C23E1400FDD239CF755466AB3E7B
+        Type:            NT_PRPSINFO
+  - Name:            .gnu.hash
+    Type:            SHT_GNU_HASH
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2F0
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Header:
+      SymNdx:          0x7
+      Shift2:          0x6
+    BloomFilter:     [ 0x400000400000 ]
+    HashBuckets:     [ 0x7, 0x0 ]
+    HashValues:      [ 0x7C9DCB97 ]
+  - Name:            .dynsym
+    Type:            SHT_DYNSYM
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x318
+    Link:            .dynstr
+    AddressAlign:    0x8
+  - Name:            .dynstr
+    Type:            SHT_STRTAB
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x3D8
+    AddressAlign:    0x1
+    Content:         "6C6962412E736F006C6962632E736F2E3600244F524947494E2F2E2E2F4100"
+  - Name:            .gnu.version
+    Type:            SHT_GNU_versym
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x468
+    Link:            .dynsym
+    AddressAlign:    0x2
+    Entries:         [ 0, 1, 2, 1, 1, 1, 2, 1 ]
+  - Name:            .gnu.version_r
+    Type:            SHT_GNU_verneed
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x478
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Dependencies:
+      - Version:         1
+        File:            libc.so.6
+        Entries:
+          - Name:            GLIBC_2.2.5
+            Hash:            157882997
+            Flags:           0
+            Other:           2
+  - Name:            .rela.dyn
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x498
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Relocations:
+      - Offset:          0x3DF0
+        Type:            R_X86_64_RELATIVE
+        Addend:          4400
+      - Offset:          0x3DF8
+        Type:            R_X86_64_RELATIVE
+        Addend:          4336
+      - Offset:          0x4028
+        Type:            R_X86_64_RELATIVE
+        Addend:          16424
+      - Offset:          0x3FE0
+        Symbol:          _ITM_deregisterTMCloneTable
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FE8
+        Symbol:          __gmon_start__
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FF0
+        Symbol:          _ITM_registerTMCloneTable
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FF8
+        Symbol:          __cxa_finalize
+        Type:            R_X86_64_GLOB_DAT
+  - Name:            .rela.plt
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC, SHF_INFO_LINK ]
+    Address:         0x540
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Info:            .got.plt
+    Relocations:
+      - Offset:          0x4018
+        Symbol:          puts
+        Type:            R_X86_64_JUMP_SLOT
+      - Offset:          0x4020
+        Symbol:          sayA
+        Type:            R_X86_64_JUMP_SLOT
+  - Name:            .init
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1000
+    AddressAlign:    0x4
+    Offset:          0x1000
+    Content:         F30F1EFA4883EC08488B05D92F00004885C07402FFD04883C408C3
+  - Name:            .plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1020
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         FF35E22F0000F2FF25E32F00000F1F00F30F1EFA6800000000F2E9E1FFFFFF90F30F1EFA6801000000F2E9D1FFFFFF90
+  - Name:            .plt.got
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1050
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         F30F1EFAF2FF259D2F00000F1F440000
+  - Name:            .plt.sec
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1060
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         F30F1EFAF2FF25AD2F00000F1F440000F30F1EFAF2FF25A52F00000F1F440000
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1080
+    AddressAlign:    0x10
+    Content:         488D3DA92F0000488D05A22F00004839F87415488B05462F00004885C07409FFE00F1F8000000000C30F1F8000000000488D3D792F0000488D35722F00004829FE4889F048C1EE3F48C1F8034801C648D1FE7414488B05152F00004885C07408FFE0660F1F440000C30F1F8000000000F30F1EFA803D352F000000752B5548833DF22E0000004889E5740C488B3D162F0000E839FFFFFFE864FFFFFFC6050D2F0000015DC30F1F00C30F1F8000000000F30F1EFAE977FFFFFFF30F1EFA554889E5B800000000E825FFFFFF905DC3F30F1EFA554889E5488D05A30E00004889C7E8FBFEFFFF905DC3
+  - Name:            .fini
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1168
+    AddressAlign:    0x4
+    Content:         F30F1EFA4883EC084883C408C3
+  - Name:            .rodata
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2000
+    AddressAlign:    0x1
+    Offset:          0x2000
+    Content:         48656C6C6F2066726F6D20442100
+  - Name:            .eh_frame_hdr
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2010
+    AddressAlign:    0x4
+    Content:         011B033B340000000500000010F0FFFF5000000040F0FFFF7800000050F0FFFF9000000029F1FFFFA80000003EF1FFFFC8000000
+  - Name:            .eh_frame
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2048
+    AddressAlign:    0x8
+    Content:         1400000000000000017A5200017810011B0C070890010000240000001C000000B8EFFFFF30000000000E10460E184A0F0B770880003F1A3A2A332422000000001400000044000000C0EFFFFF100000000000000000000000140000005C000000B8EFFFFF2000000000000000000000001C0000007400000079F0FFFF1500000000450E108602430D064C0C07080000001C000000940000006EF0FFFF1A00000000450E108602430D06510C070800000000000000
+  - Name:            .init_array
+    Type:            SHT_INIT_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3DF0
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Offset:          0x2DF0
+    Content:         '3011000000000000'
+  - Name:            .fini_array
+    Type:            SHT_FINI_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3DF8
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         F010000000000000
+  - Name:            .dynamic
+    Type:            SHT_DYNAMIC
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E00
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Entries:
+      - Tag:             DT_NEEDED
+        Value:           0x00
+      - Tag:             DT_NEEDED
+        Value:           0x08
+      - Tag:             DT_RUNPATH
+        Value:           0x12
+      - Tag:             DT_INIT
+        Value:           0x1000
+      - Tag:             DT_FINI
+        Value:           0x1168
+      - Tag:             DT_INIT_ARRAY
+        Value:           0x3DF0
+      - Tag:             DT_INIT_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_FINI_ARRAY
+        Value:           0x3DF8
+      - Tag:             DT_FINI_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_GNU_HASH
+        Value:           0x2F0
+      - Tag:             DT_STRTAB
+        Value:           0x3D8
+      - Tag:             DT_SYMTAB
+        Value:           0x318
+      - Tag:             DT_STRSZ
+        Value:           0x8F
+      - Tag:             DT_SYMENT
+        Value:           0x18
+      - Tag:             DT_PLTGOT
+        Value:           0x4000
+      - Tag:             DT_PLTRELSZ
+        Value:           0x30
+      - Tag:             DT_PLTREL
+        Value:           0x7
+      - Tag:             DT_JMPREL
+        Value:           0x540
+      - Tag:             DT_RELA
+        Value:           0x498
+      - Tag:             DT_RELASZ
+        Value:           0xA8
+      - Tag:             DT_RELAENT
+        Value:           0x18
+      - Tag:             DT_VERNEED
+        Value:           0x478
+      - Tag:             DT_VERNEEDNUM
+        Value:           0x1
+      - Tag:             DT_VERSYM
+        Value:           0x468
+      - Tag:             DT_RELACOUNT
+        Value:           0x3
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+  - Name:            .got
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3FE0
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '0000000000000000000000000000000000000000000000000000000000000000'
+  - Name:            .got.plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4000
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '003E0000000000000000000000000000000000000000000030100000000000004010000000000000'
+  - Name:            .data
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4028
+    AddressAlign:    0x8
+    Content:         '2840000000000000'
+  - Name:            .bss
+    Type:            SHT_NOBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4030
+    AddressAlign:    0x1
+    Size:            0x8
+  - Name:            .comment
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         4743433A20285562756E74752031312E342E302D317562756E7475317E32322E30342E32292031312E342E3000
+Symbols:
+  - Name:            crtstuff.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            deregister_tm_clones
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1080
+  - Name:            register_tm_clones
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x10B0
+  - Name:            __do_global_dtors_aux
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x10F0
+  - Name:            completed.0
+    Type:            STT_OBJECT
+    Section:         .bss
+    Value:           0x4030
+    Size:            0x1
+  - Name:            __do_global_dtors_aux_fini_array_entry
+    Type:            STT_OBJECT
+    Section:         .fini_array
+    Value:           0x3DF8
+  - Name:            frame_dummy
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1130
+  - Name:            __frame_dummy_init_array_entry
+    Type:            STT_OBJECT
+    Section:         .init_array
+    Value:           0x3DF0
+  - Name:            libD.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            keepSayA
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1139
+    Size:            0x15
+  - Name:            'crtstuff.c (1)'
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            __FRAME_END__
+    Type:            STT_OBJECT
+    Section:         .eh_frame
+    Value:           0x20F8
+  - Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            _fini
+    Type:            STT_FUNC
+    Section:         .fini
+    Value:           0x1168
+  - Name:            __dso_handle
+    Type:            STT_OBJECT
+    Section:         .data
+    Value:           0x4028
+  - Name:            _DYNAMIC
+    Type:            STT_OBJECT
+    Section:         .dynamic
+    Value:           0x3E00
+  - Name:            __GNU_EH_FRAME_HDR
+    Section:         .eh_frame_hdr
+    Value:           0x2010
+  - Name:            __TMC_END__
+    Type:            STT_OBJECT
+    Section:         .data
+    Value:           0x4030
+  - Name:            _GLOBAL_OFFSET_TABLE_
+    Type:            STT_OBJECT
+    Section:         .got.plt
+    Value:           0x4000
+  - Name:            _init
+    Type:            STT_FUNC
+    Section:         .init
+    Value:           0x1000
+  - Name:            _ITM_deregisterTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            'puts@GLIBC_2.2.5'
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            sayA
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            __gmon_start__
+    Binding:         STB_WEAK
+  - Name:            sayD
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x114E
+    Size:            0x1A
+  - Name:            _ITM_registerTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            '__cxa_finalize@GLIBC_2.2.5'
+    Type:            STT_FUNC
+    Binding:         STB_WEAK
+DynamicSymbols:
+  - Name:            _ITM_deregisterTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            puts
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            sayA
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            __gmon_start__
+    Binding:         STB_WEAK
+  - Name:            _ITM_registerTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            __cxa_finalize
+    Type:            STT_FUNC
+    Binding:         STB_WEAK
+  - Name:            sayD
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x114E
+    Size:            0x1A
+...
diff --git a/llvm/unittests/ExecutionEngine/Orc/Inputs/D/D_macho.yaml b/llvm/unittests/ExecutionEngine/Orc/Inputs/D/D_macho.yaml
new file mode 100644
index 0000000..1f80c1d
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/Orc/Inputs/D/D_macho.yaml
@@ -0,0 +1,801 @@
+--- !fat-mach-o
+FatHeader:
+  magic:           0xCAFEBABE
+  nfat_arch:       3
+FatArchs:
+  - cputype:         0x1000007
+    cpusubtype:      0x3
+    offset:          0x1000
+    size:            8432
+    align:           12
+  - cputype:         0x100000C
+    cpusubtype:      0x0
+    offset:          0x4000
+    size:            33424
+    align:           14
+  - cputype:         0x100000C
+    cpusubtype:      0x80000002
+    offset:          0x10000
+    size:            33424
+    align:           14
+Slices:
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x1000007
+      cpusubtype:      0x3
+      filetype:        0x6
+      ncmds:           16
+      sizeofcmds:      1040
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         392
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          4096
+        fileoff:         0
+        filesize:        4096
+        maxprot:         5
+        initprot:        5
+        nsects:          4
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0xF60
+            size:            36
+            offset:          0xF60
+            align:           4
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         554889E5B000E81F0000005DC30F1F00554889E5488D3D15000000B000E8020000005DC3
+          - sectname:        __stubs
+            segname:         __TEXT
+            addr:            0xF84
+            size:            12
+            offset:          0xF84
+            align:           1
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0x6
+            reserved3:       0x0
+            content:         FF2576000000FF2578000000
+          - sectname:        __cstring
+            segname:         __TEXT
+            addr:            0xF90
+            size:            15
+            offset:          0xF90
+            align:           0
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x2
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         48656C6C6F2066726F6D2044210A00
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0xFA0
+            size:            88
+            offset:          0xFA0
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000600F00004000000040000000840F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000100000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          4096
+        vmsize:          4096
+        fileoff:         4096
+        filesize:        4096
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __got
+            segname:         __DATA_CONST
+            addr:            0x1000
+            size:            16
+            offset:          0x1000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x2
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         '00000000000010800100000000000080'
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          8192
+        vmsize:          4096
+        fileoff:         8192
+        filesize:        240
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libD.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         8192
+        datasize:        96
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         8288
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          8320
+        nsyms:           4
+        stroff:          8400
+        strsize:         32
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       1
+        iextdefsym:      1
+        nextdefsym:      1
+        iundefsym:       2
+        nundefsym:       2
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  8384
+        nindirectsyms:   4
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            8B5D4A65-6C4F-3D34-9294-26E03CFBD3AE
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libA.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../A'
+        ZeroPadBytes:    3
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         8312
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         8320
+        datasize:        0
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayD
+            Flags:           0x0
+            Address:         0xF70
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          22
+          n_type:          0xE
+          n_sect:          1
+          n_desc:          0
+          n_value:         3936
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         3952
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          512
+          n_value:         0
+        - n_strx:          16
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayD
+        - _printf
+        - _sayA
+        - _keepSayA
+      IndirectSymbols: [ 0x2, 0x3, 0x2, 0x3 ]
+      FunctionStarts:  [ 0xF60, 0xF70 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x50, 0x0, 0x0, 0x0, 0x2, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x10, 0x6, 0x0, 
+                         0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x2, 0x2, 0x0, 
+                         0x0, 0x1, 0x12, 0x0, 0x0, 0x0, 0x5F, 0x70, 0x72, 
+                         0x69, 0x6E, 0x74, 0x66, 0x0, 0x5F, 0x73, 0x61, 
+                         0x79, 0x41, 0x0, 0x0 ]
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x100000C
+      cpusubtype:      0x0
+      filetype:        0x6
+      ncmds:           17
+      sizeofcmds:      1056
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         392
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          16384
+        fileoff:         0
+        filesize:        16384
+        maxprot:         5
+        initprot:        5
+        nsects:          4
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0x3F50
+            size:            48
+            offset:          0x3F50
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         FD7BBFA9FD0300910D000094FD7BC1A8C0035FD6FD7BBFA9FD0300910000009000603E9103000094FD7BC1A8C0035FD6
+          - sectname:        __stubs
+            segname:         __TEXT
+            addr:            0x3F80
+            size:            24
+            offset:          0x3F80
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0xC
+            reserved3:       0x0
+            content:         100000B0100240F900021FD6100000B0100640F900021FD6
+          - sectname:        __cstring
+            segname:         __TEXT
+            addr:            0x3F98
+            size:            15
+            offset:          0x3F98
+            align:           0
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x2
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         48656C6C6F2066726F6D2044210A00
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0x3FA8
+            size:            88
+            offset:          0x3FA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000503F00004000000040000000803F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000400000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          16384
+        vmsize:          16384
+        fileoff:         16384
+        filesize:        16384
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __got
+            segname:         __DATA_CONST
+            addr:            0x4000
+            size:            16
+            offset:          0x4000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x2
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         '00000000000010800100000000000080'
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          32768
+        vmsize:          16384
+        fileoff:         32768
+        filesize:        656
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libD.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         32768
+        datasize:        96
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         32864
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          32896
+        nsyms:           4
+        stroff:          32976
+        strsize:         32
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       1
+        iextdefsym:      1
+        nextdefsym:      1
+        iundefsym:       2
+        nundefsym:       2
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  32960
+        nindirectsyms:   4
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            5898A6CE-0F78-3CA2-8F7D-B1AAAF26C49F
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libA.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../A'
+        ZeroPadBytes:    3
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         32888
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         32896
+        datasize:        0
+      - cmd:             LC_CODE_SIGNATURE
+        cmdsize:         16
+        dataoff:         33008
+        datasize:        416
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayD
+            Flags:           0x0
+            Address:         0x3F64
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          22
+          n_type:          0xE
+          n_sect:          1
+          n_desc:          0
+          n_value:         16208
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         16228
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          512
+          n_value:         0
+        - n_strx:          16
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayD
+        - _printf
+        - _sayA
+        - _keepSayA
+      IndirectSymbols: [ 0x2, 0x3, 0x2, 0x3 ]
+      FunctionStarts:  [ 0x3F50, 0x3F64 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x50, 0x0, 0x0, 0x0, 0x2, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x40, 0x6, 0x0, 
+                         0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x2, 0x2, 0x0, 
+                         0x0, 0x1, 0x12, 0x0, 0x0, 0x0, 0x5F, 0x70, 0x72, 
+                         0x69, 0x6E, 0x74, 0x66, 0x0, 0x5F, 0x73, 0x61, 
+                         0x79, 0x41, 0x0, 0x0 ]
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x100000C
+      cpusubtype:      0x80000002
+      filetype:        0x6
+      ncmds:           17
+      sizeofcmds:      1056
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         392
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          16384
+        fileoff:         0
+        filesize:        16384
+        maxprot:         5
+        initprot:        5
+        nsects:          4
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0x3F40
+            size:            56
+            offset:          0x3F40
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         7F2303D5FD7BBFA9FD0300910F000094FD7BC1A8FF0F5FD67F2303D5FD7BBFA9FD0300910000009000603E9103000094FD7BC1A8FF0F5FD6
+          - sectname:        __auth_stubs
+            segname:         __TEXT
+            addr:            0x3F78
+            size:            32
+            offset:          0x3F78
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0x10
+            reserved3:       0x0
+            content:         110000B031020091300240F9110A1FD7110000B031220091300240F9110A1FD7
+          - sectname:        __cstring
+            segname:         __TEXT
+            addr:            0x3F98
+            size:            15
+            offset:          0x3F98
+            align:           0
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x2
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         48656C6C6F2066726F6D2044210A00
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0x3FA8
+            size:            88
+            offset:          0x3FA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000403F00004000000040000000783F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000400000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          16384
+        vmsize:          16384
+        fileoff:         16384
+        filesize:        16384
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __auth_got
+            segname:         __DATA_CONST
+            addr:            0x4000
+            size:            16
+            offset:          0x4000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x2
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         00000000000009C001000000000001C0
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          32768
+        vmsize:          16384
+        fileoff:         32768
+        filesize:        656
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libD.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         32768
+        datasize:        96
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         32864
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          32896
+        nsyms:           4
+        stroff:          32976
+        strsize:         32
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       1
+        iextdefsym:      1
+        nextdefsym:      1
+        iundefsym:       2
+        nundefsym:       2
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  32960
+        nindirectsyms:   4
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            81A288C4-6F51-3913-9330-EDE155D1DD35
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libA.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_RPATH
+        cmdsize:         32
+        path:            12
+        Content:         '@loader_path/../A'
+        ZeroPadBytes:    3
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         32888
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         32896
+        datasize:        0
+      - cmd:             LC_CODE_SIGNATURE
+        cmdsize:         16
+        dataoff:         33008
+        datasize:        416
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayD
+            Flags:           0x0
+            Address:         0x3F58
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          22
+          n_type:          0xE
+          n_sect:          1
+          n_desc:          0
+          n_value:         16192
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         16216
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          512
+          n_value:         0
+        - n_strx:          16
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayD
+        - _printf
+        - _sayA
+        - _keepSayA
+      IndirectSymbols: [ 0x2, 0x3, 0x2, 0x3 ]
+      FunctionStarts:  [ 0x3F40, 0x3F58 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x50, 0x0, 0x0, 0x0, 0x2, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x40, 0xC, 0x0, 
+                         0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x2, 0x2, 0x0, 
+                         0x0, 0x1, 0x12, 0x0, 0x0, 0x0, 0x5F, 0x70, 0x72, 
+                         0x69, 0x6E, 0x74, 0x66, 0x0, 0x5F, 0x73, 0x61, 
+                         0x79, 0x41, 0x0, 0x0 ]
+...
diff --git a/llvm/unittests/ExecutionEngine/Orc/Inputs/Z/Z_linux.yaml b/llvm/unittests/ExecutionEngine/Orc/Inputs/Z/Z_linux.yaml
new file mode 100644
index 0000000..5561f29
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/Orc/Inputs/Z/Z_linux.yaml
@@ -0,0 +1,460 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_DYN
+  Machine:         EM_X86_64
+ProgramHeaders:
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .rela.plt
+    Align:           0x1000
+    Offset:          0x0
+  - Type:            PT_LOAD
+    Flags:           [ PF_X, PF_R ]
+    FirstSec:        .init
+    LastSec:         .fini
+    VAddr:           0x1000
+    Align:           0x1000
+    Offset:          0x1000
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .rodata
+    LastSec:         .eh_frame
+    VAddr:           0x2000
+    Align:           0x1000
+    Offset:          0x2000
+  - Type:            PT_LOAD
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .bss
+    VAddr:           0x3E10
+    Align:           0x1000
+    Offset:          0x2E10
+  - Type:            PT_DYNAMIC
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .dynamic
+    LastSec:         .dynamic
+    VAddr:           0x3E20
+    Align:           0x8
+    Offset:          0x2E20
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .note.gnu.property
+    VAddr:           0x2A8
+    Align:           0x8
+    Offset:          0x2A8
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.build-id
+    LastSec:         .note.gnu.build-id
+    VAddr:           0x2C8
+    Align:           0x4
+    Offset:          0x2C8
+  - Type:            PT_GNU_PROPERTY
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .note.gnu.property
+    VAddr:           0x2A8
+    Align:           0x8
+    Offset:          0x2A8
+  - Type:            PT_GNU_EH_FRAME
+    Flags:           [ PF_R ]
+    FirstSec:        .eh_frame_hdr
+    LastSec:         .eh_frame_hdr
+    VAddr:           0x2010
+    Align:           0x4
+    Offset:          0x2010
+  - Type:            PT_GNU_STACK
+    Flags:           [ PF_W, PF_R ]
+    Align:           0x10
+    Offset:          0x0
+  - Type:            PT_GNU_RELRO
+    Flags:           [ PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .got
+    VAddr:           0x3E10
+    Offset:          0x2E10
+Sections:
+  - Name:            .note.gnu.property
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2A8
+    AddressAlign:    0x8
+    Notes:
+      - Name:            GNU
+        Desc:            020000C0040000000300000000000000
+        Type:            NT_GNU_PROPERTY_TYPE_0
+  - Name:            .note.gnu.build-id
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2C8
+    AddressAlign:    0x4
+    Notes:
+      - Name:            GNU
+        Desc:            640A4A3AC0DF6BA3DAC3B51CCD727245117E0B30
+        Type:            NT_PRPSINFO
+  - Name:            .gnu.hash
+    Type:            SHT_GNU_HASH
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2F0
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Header:
+      SymNdx:          0x6
+      Shift2:          0x6
+    BloomFilter:     [ 0x500000000000 ]
+    HashBuckets:     [ 0x6, 0x0 ]
+    HashValues:      [ 0x7C9DCBAD ]
+  - Name:            .dynsym
+    Type:            SHT_DYNSYM
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x318
+    Link:            .dynstr
+    AddressAlign:    0x8
+  - Name:            .dynstr
+    Type:            SHT_STRTAB
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x3C0
+    AddressAlign:    0x1
+  - Name:            .gnu.version
+    Type:            SHT_GNU_versym
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x436
+    Link:            .dynsym
+    AddressAlign:    0x2
+    Entries:         [ 0, 1, 2, 1, 1, 2, 1 ]
+  - Name:            .gnu.version_r
+    Type:            SHT_GNU_verneed
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x448
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Dependencies:
+      - Version:         1
+        File:            libc.so.6
+        Entries:
+          - Name:            GLIBC_2.2.5
+            Hash:            157882997
+            Flags:           0
+            Other:           2
+  - Name:            .rela.dyn
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x468
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Relocations:
+      - Offset:          0x3E10
+        Type:            R_X86_64_RELATIVE
+        Addend:          4368
+      - Offset:          0x3E18
+        Type:            R_X86_64_RELATIVE
+        Addend:          4304
+      - Offset:          0x4020
+        Type:            R_X86_64_RELATIVE
+        Addend:          16416
+      - Offset:          0x3FE0
+        Symbol:          _ITM_deregisterTMCloneTable
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FE8
+        Symbol:          __gmon_start__
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FF0
+        Symbol:          _ITM_registerTMCloneTable
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x3FF8
+        Symbol:          __cxa_finalize
+        Type:            R_X86_64_GLOB_DAT
+  - Name:            .rela.plt
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC, SHF_INFO_LINK ]
+    Address:         0x510
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Info:            .got.plt
+    Relocations:
+      - Offset:          0x4018
+        Symbol:          puts
+        Type:            R_X86_64_JUMP_SLOT
+  - Name:            .init
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1000
+    AddressAlign:    0x4
+    Offset:          0x1000
+    Content:         F30F1EFA4883EC08488B05D92F00004885C07402FFD04883C408C3
+  - Name:            .plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1020
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         FF35E22F0000F2FF25E32F00000F1F00F30F1EFA6800000000F2E9E1FFFFFF90
+  - Name:            .plt.got
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1040
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         F30F1EFAF2FF25AD2F00000F1F440000
+  - Name:            .plt.sec
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1050
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         F30F1EFAF2FF25BD2F00000F1F440000
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1060
+    AddressAlign:    0x10
+    Content:         488D3DC12F0000488D05BA2F00004839F87415488B05662F00004885C07409FFE00F1F8000000000C30F1F8000000000488D3D912F0000488D358A2F00004829FE4889F048C1EE3F48C1F8034801C648D1FE7414488B05352F00004885C07408FFE0660F1F440000C30F1F8000000000F30F1EFA803D4D2F000000752B5548833D122F0000004889E5740C488B3D2E2F0000E849FFFFFFE864FFFFFFC605252F0000015DC30F1F00C30F1F8000000000F30F1EFAE977FFFFFFF30F1EFA554889E5488D05D80E00004889C7E820FFFFFF905DC3
+  - Name:            .fini
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x1134
+    AddressAlign:    0x4
+    Content:         F30F1EFA4883EC084883C408C3
+  - Name:            .rodata
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2000
+    AddressAlign:    0x1
+    Offset:          0x2000
+    Content:         48656C6C6F2066726F6D205A00
+  - Name:            .eh_frame_hdr
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2010
+    AddressAlign:    0x4
+    Content:         011B033B2C0000000400000010F0FFFF4800000030F0FFFF7000000040F0FFFF8800000009F1FFFFA0000000
+  - Name:            .eh_frame
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2040
+    AddressAlign:    0x8
+    Content:         1400000000000000017A5200017810011B0C070890010000240000001C000000C0EFFFFF20000000000E10460E184A0F0B770880003F1A3A2A332422000000001400000044000000B8EFFFFF100000000000000000000000140000005C000000B0EFFFFF1000000000000000000000001C0000007400000061F0FFFF1A00000000450E108602430D06510C070800000000000000
+  - Name:            .init_array
+    Type:            SHT_INIT_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E10
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Offset:          0x2E10
+    Content:         '1011000000000000'
+  - Name:            .fini_array
+    Type:            SHT_FINI_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E18
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         D010000000000000
+  - Name:            .dynamic
+    Type:            SHT_DYNAMIC
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3E20
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Entries:
+      - Tag:             DT_NEEDED
+        Value:           0x5F
+      - Tag:             DT_INIT
+        Value:           0x1000
+      - Tag:             DT_FINI
+        Value:           0x1134
+      - Tag:             DT_INIT_ARRAY
+        Value:           0x3E10
+      - Tag:             DT_INIT_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_FINI_ARRAY
+        Value:           0x3E18
+      - Tag:             DT_FINI_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_GNU_HASH
+        Value:           0x2F0
+      - Tag:             DT_STRTAB
+        Value:           0x3C0
+      - Tag:             DT_SYMTAB
+        Value:           0x318
+      - Tag:             DT_STRSZ
+        Value:           0x75
+      - Tag:             DT_SYMENT
+        Value:           0x18
+      - Tag:             DT_PLTGOT
+        Value:           0x4000
+      - Tag:             DT_PLTRELSZ
+        Value:           0x18
+      - Tag:             DT_PLTREL
+        Value:           0x7
+      - Tag:             DT_JMPREL
+        Value:           0x510
+      - Tag:             DT_RELA
+        Value:           0x468
+      - Tag:             DT_RELASZ
+        Value:           0xA8
+      - Tag:             DT_RELAENT
+        Value:           0x18
+      - Tag:             DT_VERNEED
+        Value:           0x448
+      - Tag:             DT_VERNEEDNUM
+        Value:           0x1
+      - Tag:             DT_VERSYM
+        Value:           0x436
+      - Tag:             DT_RELACOUNT
+        Value:           0x3
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+  - Name:            .got
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3FE0
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '0000000000000000000000000000000000000000000000000000000000000000'
+  - Name:            .got.plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4000
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '203E000000000000000000000000000000000000000000003010000000000000'
+  - Name:            .data
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4020
+    AddressAlign:    0x8
+    Content:         '2040000000000000'
+  - Name:            .bss
+    Type:            SHT_NOBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x4028
+    AddressAlign:    0x1
+    Size:            0x8
+  - Name:            .comment
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         4743433A20285562756E74752031312E342E302D317562756E7475317E32322E30342E32292031312E342E3000
+Symbols:
+  - Name:            crtstuff.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            deregister_tm_clones
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1060
+  - Name:            register_tm_clones
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1090
+  - Name:            __do_global_dtors_aux
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x10D0
+  - Name:            completed.0
+    Type:            STT_OBJECT
+    Section:         .bss
+    Value:           0x4028
+    Size:            0x1
+  - Name:            __do_global_dtors_aux_fini_array_entry
+    Type:            STT_OBJECT
+    Section:         .fini_array
+    Value:           0x3E18
+  - Name:            frame_dummy
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x1110
+  - Name:            __frame_dummy_init_array_entry
+    Type:            STT_OBJECT
+    Section:         .init_array
+    Value:           0x3E10
+  - Name:            libZ.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            'crtstuff.c (1)'
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            __FRAME_END__
+    Type:            STT_OBJECT
+    Section:         .eh_frame
+    Value:           0x20D0
+  - Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            _fini
+    Type:            STT_FUNC
+    Section:         .fini
+    Value:           0x1134
+  - Name:            __dso_handle
+    Type:            STT_OBJECT
+    Section:         .data
+    Value:           0x4020
+  - Name:            _DYNAMIC
+    Type:            STT_OBJECT
+    Section:         .dynamic
+    Value:           0x3E20
+  - Name:            __GNU_EH_FRAME_HDR
+    Section:         .eh_frame_hdr
+    Value:           0x2010
+  - Name:            __TMC_END__
+    Type:            STT_OBJECT
+    Section:         .data
+    Value:           0x4028
+  - Name:            _GLOBAL_OFFSET_TABLE_
+    Type:            STT_OBJECT
+    Section:         .got.plt
+    Value:           0x4000
+  - Name:            _init
+    Type:            STT_FUNC
+    Section:         .init
+    Value:           0x1000
+  - Name:            _ITM_deregisterTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            'puts@GLIBC_2.2.5'
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            __gmon_start__
+    Binding:         STB_WEAK
+  - Name:            sayZ
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x1119
+    Size:            0x1A
+  - Name:            _ITM_registerTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            '__cxa_finalize@GLIBC_2.2.5'
+    Type:            STT_FUNC
+    Binding:         STB_WEAK
+DynamicSymbols:
+  - Name:            _ITM_deregisterTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            puts
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            __gmon_start__
+    Binding:         STB_WEAK
+  - Name:            _ITM_registerTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            __cxa_finalize
+    Type:            STT_FUNC
+    Binding:         STB_WEAK
+  - Name:            sayZ
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x1119
+    Size:            0x1A
+...
diff --git a/llvm/unittests/ExecutionEngine/Orc/Inputs/Z/Z_macho.yaml b/llvm/unittests/ExecutionEngine/Orc/Inputs/Z/Z_macho.yaml
new file mode 100644
index 0000000..c0c1826
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/Orc/Inputs/Z/Z_macho.yaml
@@ -0,0 +1,723 @@
+--- !fat-mach-o
+FatHeader:
+  magic:           0xCAFEBABE
+  nfat_arch:       3
+FatArchs:
+  - cputype:         0x1000007
+    cpusubtype:      0x3
+    offset:          0x1000
+    size:            8376
+    align:           12
+  - cputype:         0x100000C
+    cpusubtype:      0x0
+    offset:          0x4000
+    size:            33376
+    align:           14
+  - cputype:         0x100000C
+    cpusubtype:      0x80000002
+    offset:          0x10000
+    size:            33376
+    align:           14
+Slices:
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x1000007
+      cpusubtype:      0x3
+      filetype:        0x6
+      ncmds:           14
+      sizeofcmds:      960
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         392
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          4096
+        fileoff:         0
+        filesize:        4096
+        maxprot:         5
+        initprot:        5
+        nsects:          4
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0xF80
+            size:            20
+            offset:          0xF80
+            align:           4
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         554889E5488D3D0F000000B000E8020000005DC3
+          - sectname:        __stubs
+            segname:         __TEXT
+            addr:            0xF94
+            size:            6
+            offset:          0xF94
+            align:           1
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0x6
+            reserved3:       0x0
+            content:         FF2566000000
+          - sectname:        __cstring
+            segname:         __TEXT
+            addr:            0xF9A
+            size:            14
+            offset:          0xF9A
+            align:           0
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x2
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         48656C6C6F2066726F6D205A0A00
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0xFA8
+            size:            88
+            offset:          0xFA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000800F00004000000040000000940F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000100000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          4096
+        vmsize:          4096
+        fileoff:         4096
+        filesize:        4096
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __got
+            segname:         __DATA_CONST
+            addr:            0x1000
+            size:            8
+            offset:          0x1000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x1
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         '0000000000000080'
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          8192
+        vmsize:          4096
+        fileoff:         8192
+        filesize:        184
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libZ.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         8192
+        datasize:        96
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         8288
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          8320
+        nsyms:           2
+        stroff:          8360
+        strsize:         16
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       0
+        iextdefsym:      0
+        nextdefsym:      1
+        iundefsym:       1
+        nundefsym:       1
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  8352
+        nindirectsyms:   2
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            399E203C-FF9A-3B80-872C-85F3A759A78B
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         8312
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         8320
+        datasize:        0
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayZ
+            Flags:           0x0
+            Address:         0xF80
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         3968
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayZ
+        - _printf
+      IndirectSymbols: [ 0x1, 0x1 ]
+      FunctionStarts:  [ 0xF80 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x50, 0x0, 0x0, 0x0, 0x1, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x10, 0x6, 0x0, 
+                         0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x2, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5F, 0x70, 0x72, 
+                         0x69, 0x6E, 0x74, 0x66, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0 ]
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x100000C
+      cpusubtype:      0x0
+      filetype:        0x6
+      ncmds:           15
+      sizeofcmds:      976
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         392
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          16384
+        fileoff:         0
+        filesize:        16384
+        maxprot:         5
+        initprot:        5
+        nsects:          4
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0x3F70
+            size:            28
+            offset:          0x3F70
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         FD7BBFA9FD0300910000009000603E9103000094FD7BC1A8C0035FD6
+          - sectname:        __stubs
+            segname:         __TEXT
+            addr:            0x3F8C
+            size:            12
+            offset:          0x3F8C
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0xC
+            reserved3:       0x0
+            content:         100000B0100240F900021FD6
+          - sectname:        __cstring
+            segname:         __TEXT
+            addr:            0x3F98
+            size:            14
+            offset:          0x3F98
+            align:           0
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x2
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         48656C6C6F2066726F6D205A0A00
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0x3FA8
+            size:            88
+            offset:          0x3FA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000703F000040000000400000008C3F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000400000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          16384
+        vmsize:          16384
+        fileoff:         16384
+        filesize:        16384
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __got
+            segname:         __DATA_CONST
+            addr:            0x4000
+            size:            8
+            offset:          0x4000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x1
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         '0000000000000080'
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          32768
+        vmsize:          16384
+        fileoff:         32768
+        filesize:        608
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libZ.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         32768
+        datasize:        96
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         32864
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          32896
+        nsyms:           2
+        stroff:          32936
+        strsize:         16
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       0
+        iextdefsym:      0
+        nextdefsym:      1
+        iundefsym:       1
+        nundefsym:       1
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  32928
+        nindirectsyms:   2
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            6E8E78AF-EDB2-3830-BE1E-013390302CC5
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         32888
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         32896
+        datasize:        0
+      - cmd:             LC_CODE_SIGNATURE
+        cmdsize:         16
+        dataoff:         32960
+        datasize:        416
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayZ
+            Flags:           0x0
+            Address:         0x3F70
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         16240
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayZ
+        - _printf
+      IndirectSymbols: [ 0x1, 0x1 ]
+      FunctionStarts:  [ 0x3F70 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x50, 0x0, 0x0, 0x0, 0x1, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x40, 0x6, 0x0, 
+                         0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x2, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5F, 0x70, 0x72, 
+                         0x69, 0x6E, 0x74, 0x66, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0 ]
+  - !mach-o
+    FileHeader:
+      magic:           0xFEEDFACF
+      cputype:         0x100000C
+      cpusubtype:      0x80000002
+      filetype:        0x6
+      ncmds:           15
+      sizeofcmds:      976
+      flags:           0x100085
+      reserved:        0x0
+    LoadCommands:
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         392
+        segname:         __TEXT
+        vmaddr:          0
+        vmsize:          16384
+        fileoff:         0
+        filesize:        16384
+        maxprot:         5
+        initprot:        5
+        nsects:          4
+        flags:           0
+        Sections:
+          - sectname:        __text
+            segname:         __TEXT
+            addr:            0x3F68
+            size:            32
+            offset:          0x3F68
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000400
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         7F2303D5FD7BBFA9FD0300910000009000603E9103000094FD7BC1A8FF0F5FD6
+          - sectname:        __auth_stubs
+            segname:         __TEXT
+            addr:            0x3F88
+            size:            16
+            offset:          0x3F88
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x80000408
+            reserved1:       0x0
+            reserved2:       0x10
+            reserved3:       0x0
+            content:         110000B031020091300240F9110A1FD7
+          - sectname:        __cstring
+            segname:         __TEXT
+            addr:            0x3F98
+            size:            14
+            offset:          0x3F98
+            align:           0
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x2
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         48656C6C6F2066726F6D205A0A00
+          - sectname:        __unwind_info
+            segname:         __TEXT
+            addr:            0x3FA8
+            size:            88
+            offset:          0x3FA8
+            align:           2
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x0
+            reserved1:       0x0
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         010000001C000000000000001C000000000000001C00000002000000683F00004000000040000000883F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000400000000
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         152
+        segname:         __DATA_CONST
+        vmaddr:          16384
+        vmsize:          16384
+        fileoff:         16384
+        filesize:        16384
+        maxprot:         3
+        initprot:        3
+        nsects:          1
+        flags:           16
+        Sections:
+          - sectname:        __auth_got
+            segname:         __DATA_CONST
+            addr:            0x4000
+            size:            8
+            offset:          0x4000
+            align:           3
+            reloff:          0x0
+            nreloc:          0
+            flags:           0x6
+            reserved1:       0x1
+            reserved2:       0x0
+            reserved3:       0x0
+            content:         00000000000001C0
+      - cmd:             LC_SEGMENT_64
+        cmdsize:         72
+        segname:         __LINKEDIT
+        vmaddr:          32768
+        vmsize:          16384
+        fileoff:         32768
+        filesize:        608
+        maxprot:         1
+        initprot:        1
+        nsects:          0
+        flags:           0
+      - cmd:             LC_ID_DYLIB
+        cmdsize:         48
+        dylib:
+          name:            24
+          timestamp:       1
+          current_version: 0
+          compatibility_version: 0
+        Content:         '@rpath/libZ.dylib'
+        ZeroPadBytes:    7
+      - cmd:             LC_DYLD_CHAINED_FIXUPS
+        cmdsize:         16
+        dataoff:         32768
+        datasize:        96
+      - cmd:             LC_DYLD_EXPORTS_TRIE
+        cmdsize:         16
+        dataoff:         32864
+        datasize:        24
+      - cmd:             LC_SYMTAB
+        cmdsize:         24
+        symoff:          32896
+        nsyms:           2
+        stroff:          32936
+        strsize:         16
+      - cmd:             LC_DYSYMTAB
+        cmdsize:         80
+        ilocalsym:       0
+        nlocalsym:       0
+        iextdefsym:      0
+        nextdefsym:      1
+        iundefsym:       1
+        nundefsym:       1
+        tocoff:          0
+        ntoc:            0
+        modtaboff:       0
+        nmodtab:         0
+        extrefsymoff:    0
+        nextrefsyms:     0
+        indirectsymoff:  32928
+        nindirectsyms:   2
+        extreloff:       0
+        nextrel:         0
+        locreloff:       0
+        nlocrel:         0
+      - cmd:             LC_UUID
+        cmdsize:         24
+        uuid:            E74F368D-238F-31FA-BF40-FA2964FED986
+      - cmd:             LC_BUILD_VERSION
+        cmdsize:         32
+        platform:        1
+        minos:           983040
+        sdk:             983552
+        ntools:          1
+        Tools:
+          - tool:            3
+            version:         73074435
+      - cmd:             LC_SOURCE_VERSION
+        cmdsize:         16
+        version:         0
+      - cmd:             LC_LOAD_DYLIB
+        cmdsize:         56
+        dylib:
+          name:            24
+          timestamp:       2
+          current_version: 88539136
+          compatibility_version: 65536
+        Content:         '/usr/lib/libSystem.B.dylib'
+        ZeroPadBytes:    6
+      - cmd:             LC_FUNCTION_STARTS
+        cmdsize:         16
+        dataoff:         32888
+        datasize:        8
+      - cmd:             LC_DATA_IN_CODE
+        cmdsize:         16
+        dataoff:         32896
+        datasize:        0
+      - cmd:             LC_CODE_SIGNATURE
+        cmdsize:         16
+        dataoff:         32960
+        datasize:        416
+    LinkEditData:
+      ExportTrie:
+        TerminalSize:    0
+        NodeOffset:      0
+        Name:            ''
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    3
+            NodeOffset:      13
+            Name:            _sayZ
+            Flags:           0x0
+            Address:         0x3F68
+            Other:           0x0
+            ImportName:      ''
+      NameList:
+        - n_strx:          2
+          n_type:          0xF
+          n_sect:          1
+          n_desc:          0
+          n_value:         16232
+        - n_strx:          8
+          n_type:          0x1
+          n_sect:          0
+          n_desc:          256
+          n_value:         0
+      StringTable:
+        - ' '
+        - _sayZ
+        - _printf
+      IndirectSymbols: [ 0x1, 0x1 ]
+      FunctionStarts:  [ 0x3F68 ]
+      ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x48, 
+                         0x0, 0x0, 0x0, 0x50, 0x0, 0x0, 0x0, 0x1, 0x0, 
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x18, 0x0, 0x0, 0x0, 0x0, 0x40, 0xC, 0x0, 
+                         0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x2, 0x0, 
+                         0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5F, 0x70, 0x72, 
+                         0x69, 0x6E, 0x74, 0x66, 0x0, 0x0, 0x0, 0x0, 0x0, 
+                         0x0, 0x0, 0x0 ]
+...
diff --git a/llvm/unittests/ExecutionEngine/Orc/LibraryResolverTest.cpp b/llvm/unittests/ExecutionEngine/Orc/LibraryResolverTest.cpp
new file mode 100644
index 0000000..f6990ee
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/Orc/LibraryResolverTest.cpp
@@ -0,0 +1,896 @@
+//===- LibraryResolverTest.cpp - Unit tests for LibraryResolver -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h"
+#include "llvm/ObjectYAML/MachOYAML.h"
+#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "llvm/Testing/Support/SupportHelpers.h"
+
+#include "gtest/gtest.h"
+
+#include <algorithm>
+#include <optional>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::orc;
+
+#if defined(__APPLE__) || defined(__linux__)
+// TODO: Add COFF (Windows) support for these tests.
+// this facility also works correctly on Windows (COFF),
+// so we should eventually enable and run these tests for that platform as well.
+namespace {
+
+#if defined(__APPLE__)
+constexpr const char *ext = ".dylib";
+#elif defined(_WIN32)
+constexpr const char *ext = ".dll";
+#else
+constexpr const char *ext = ".so";
+#endif
+
+bool EnvReady = false;
+
+Triple getTargetTriple() {
+  auto JTMB = JITTargetMachineBuilder::detectHost();
+  if (!JTMB) {
+    consumeError(JTMB.takeError());
+    return Triple();
+  }
+  return JTMB->getTargetTriple();
+}
+
+static bool CheckHostSupport() {
+  auto Triple = getTargetTriple();
+  // TODO: Extend support to COFF (Windows) once test setup and YAML conversion
+  // are verified.
+  if (!Triple.isOSBinFormatMachO() &&
+      !(Triple.isOSBinFormatELF() && Triple.getArch() == Triple::x86_64))
+    return false;
+
+  return true;
+}
+
+std::string getYamlFilePlatformExt() {
+  auto Triple = getTargetTriple();
+  if (Triple.isOSBinFormatMachO())
+    return "_macho";
+  else if (Triple.isOSBinFormatELF())
+    return "_linux";
+
+  return "";
+}
+
+unsigned getYamlDocNum() {
+  // auto Triple = getTargetTriple();
+  // if (Triple.isOSBinFormatELF())
+  //   return 1;
+
+  return 1;
+}
+
+class LibraryTestEnvironment : public ::testing::Environment {
+  std::vector<std::string> CreatedDylibsDir;
+  std::vector<std::string> CreatedDylibs;
+  SmallVector<char, 128> DirPath;
+
+public:
+  void SetUp() override {
+    if (!CheckHostSupport()) {
+      EnvReady = false;
+      return;
+    }
+
+    StringRef ThisFile = __FILE__;
+    SmallVector<char, 128> InputDirPath(ThisFile.begin(), ThisFile.end());
+    sys::path::remove_filename(InputDirPath);
+    sys::path::append(InputDirPath, "Inputs");
+    if (!sys::fs::exists(InputDirPath))
+      return;
+
+    SmallString<128> UniqueDir;
+    sys::path::append(UniqueDir, InputDirPath);
+    std::error_code EC = sys::fs::createUniqueDirectory(UniqueDir, DirPath);
+
+    if (EC)
+      return;
+
+    // given yamlPath + DylibPath, validate + convert
+    auto processYamlToDylib = [&](const SmallVector<char, 128> &YamlPath,
+                                  const SmallVector<char, 128> &DylibPath,
+                                  unsigned DocNum) -> bool {
+      if (!sys::fs::exists(YamlPath)) {
+        errs() << "YAML file missing: "
+               << StringRef(YamlPath.data(), YamlPath.size()) << "\n";
+        EnvReady = false;
+        return false;
+      }
+
+      auto BufOrErr = MemoryBuffer::getFile(YamlPath);
+      if (!BufOrErr) {
+        errs() << "Failed to read "
+               << StringRef(YamlPath.data(), YamlPath.size()) << ": "
+               << BufOrErr.getError().message() << "\n";
+        EnvReady = false;
+        return false;
+      }
+
+      yaml::Input yin(BufOrErr->get()->getBuffer());
+      std::error_code EC;
+      raw_fd_ostream outFile(StringRef(DylibPath.data(), DylibPath.size()), EC,
+                             sys::fs::OF_None);
+
+      if (EC) {
+        errs() << "Failed to open "
+               << StringRef(DylibPath.data(), DylibPath.size())
+               << " for writing: " << EC.message() << "\n";
+        EnvReady = false;
+        return false;
+      }
+
+      if (!yaml::convertYAML(
+              yin, outFile,
+              [](const Twine &M) {
+                // Handle or ignore errors here
+                errs() << "Yaml Error :" << M << "\n";
+              },
+              DocNum)) {
+        errs() << "Failed to convert "
+               << StringRef(YamlPath.data(), YamlPath.size()) << " to "
+               << StringRef(DylibPath.data(), DylibPath.size()) << "\n";
+        EnvReady = false;
+        return false;
+      }
+
+      CreatedDylibsDir.push_back(std::string(sys::path::parent_path(
+          StringRef(DylibPath.data(), DylibPath.size()))));
+      CreatedDylibs.push_back(std::string(DylibPath.begin(), DylibPath.end()));
+      return true;
+    };
+
+    std::vector<const char *> LibDirs = {"Z", "A", "B", "C", "D"};
+
+    unsigned DocNum = getYamlDocNum();
+    std::string YamlPltExt = getYamlFilePlatformExt();
+    for (const auto &LibdirName : LibDirs) {
+      // YAML path
+      SmallVector<char, 128> YamlPath(InputDirPath.begin(), InputDirPath.end());
+      SmallVector<char, 128> YamlFileName;
+      YamlFileName.append(LibdirName, LibdirName + strlen(LibdirName));
+      YamlFileName.append(YamlPltExt.begin(), YamlPltExt.end());
+      sys::path::append(YamlPath, LibdirName, YamlFileName);
+      sys::path::replace_extension(YamlPath, ".yaml");
+
+      // dylib path
+      SmallVector<char, 128> DylibPath(DirPath.begin(), DirPath.end());
+      SmallVector<char, 128> DylibFileName;
+      StringRef prefix("lib");
+      DylibFileName.append(prefix.begin(), prefix.end());
+      DylibFileName.append(LibdirName, LibdirName + strlen(LibdirName));
+
+      sys::path::append(DylibPath, LibdirName);
+      if (!sys::fs::exists(DylibPath)) {
+        auto EC = sys::fs::create_directory(DylibPath);
+        if (EC)
+          return;
+      }
+      sys::path::append(DylibPath, DylibFileName);
+      sys::path::replace_extension(DylibPath, ext);
+      if (!processYamlToDylib(YamlPath, DylibPath, DocNum))
+        return;
+    }
+
+    EnvReady = true;
+  }
+
+  void TearDown() override { sys::fs::remove_directories(DirPath); }
+
+  std::string getBaseDir() const {
+    return std::string(DirPath.begin(), DirPath.end());
+  }
+
+  std::vector<std::string> getDylibPaths() const { return CreatedDylibs; }
+};
+
+static LibraryTestEnvironment *GlobalEnv =
+    static_cast<LibraryTestEnvironment *>(
+        ::testing::AddGlobalTestEnvironment(new LibraryTestEnvironment()));
+
+inline std::string libPath(const std::string &BaseDir,
+                           const std::string &name) {
+#if defined(__APPLE__)
+  return BaseDir + "/" + name + ".dylib";
+#elif defined(_WIN32)
+  return BaseDir + "/" + name + ".dll";
+#else
+  return BaseDir + "/" + name + ".so";
+#endif
+}
+
+inline std::string withext(const std::string &lib) {
+  SmallString<128> P(lib);
+  sys::path::replace_extension(P, ext);
+  return P.str().str();
+}
+
+inline std::string platformSymbolName(const std::string &name) {
+#if defined(__APPLE__)
+  return "_" + name; // macOS prepends underscore
+#else
+  return name;
+#endif
+}
+
+struct TestLibrary {
+  std::string path;
+  std::vector<std::string> Syms;
+};
+
+class LibraryResolverIT : public ::testing::Test {
+protected:
+  std::string BaseDir;
+  std::unordered_map<std::string, TestLibrary> libs;
+
+  void addLib(const std::string &name) {
+    SmallString<512> path;
+    sys::fs::real_path(libPath(BaseDir, name + "/lib" + name), path);
+    if (path.empty())
+      EnvReady = false;
+    libs[name] = {path.str().str(), {platformSymbolName("say" + name)}};
+  }
+
+  void SetUp() override {
+    if (!EnvReady)
+      GTEST_SKIP() << "Skipping test: environment setup failed.";
+
+    ASSERT_NE(GlobalEnv, nullptr);
+    BaseDir = GlobalEnv->getBaseDir();
+    for (const auto &P : GlobalEnv->getDylibPaths()) {
+      if (!sys::fs::exists(P))
+        GTEST_SKIP();
+    }
+    const std::vector<std::string> libNames = {"A", "B", "C", "D", "Z"};
+    for (const auto &name : libNames)
+      addLib(name);
+
+    if (!EnvReady)
+      GTEST_SKIP() << "Skipping test: environment setup failed.";
+  }
+
+  const std::vector<std::string> &sym(const std::string &key) {
+    return libs[key].Syms;
+  }
+  const std::string &lib(const std::string &key) { return libs[key].path; }
+  const std::string libdir(const std::string &key) {
+    SmallString<512> P(libs[key].path);
+    sys::path::remove_filename(P);
+    return P.str().str();
+  }
+  const std::string libname(const std::string &key) {
+    return sys::path::filename(libs[key].path).str();
+  }
+};
+
+// Helper: allow either "sayA" or "_sayA" depending on how your SymbolEnumerator
+// reports.
+static bool matchesEitherUnderscore(const std::string &got,
+                                    const std::string &bare) {
+  return got == bare || got == ("_" + bare);
+}
+
+// Helper: normalize path ending check (we only care that it resolved to the
+// right dylib)
+static bool endsWith(const std::string &s, const std::string &suffix) {
+  if (s.size() < suffix.size())
+    return false;
+  return std::equal(suffix.rbegin(), suffix.rend(), s.rbegin());
+}
+
+// --- 1) SymbolEnumerator enumerates real exports from libC.dylib ---
+TEST_F(LibraryResolverIT, EnumerateSymbolsFromARespectsDefaults) {
+  const std::string libC = lib("C");
+
+  SymbolEnumeratorOptions Opts = SymbolEnumeratorOptions::defaultOptions();
+
+  std::vector<std::string> seen;
+  auto onEach = [&](llvm::StringRef sym) -> EnumerateResult {
+    seen.emplace_back(sym.str());
+    return EnumerateResult::Continue;
+  };
+
+  const bool ok = SymbolEnumerator::enumerateSymbols(libC, onEach, Opts);
+  ASSERT_TRUE(ok) << "enumerateSymbols failed on " << libC;
+
+  // We expect to see sayA (export) and not an undefined reference to printf.
+  bool foundSayA = false;
+  for (const auto &s : seen) {
+    if (matchesEitherUnderscore(s, "sayA")) {
+      foundSayA = true;
+      break;
+    }
+  }
+  EXPECT_FALSE(foundSayA) << "Expected exported symbol sayA in libC";
+}
+
+TEST_F(LibraryResolverIT, EnumerateSymbols_ExportsOnly_DefaultFlags) {
+  const std::string libC = lib("C");
+  SymbolEnumeratorOptions Opts = SymbolEnumeratorOptions::defaultOptions();
+
+  std::vector<std::string> seen;
+  auto onEach = [&](llvm::StringRef sym) -> EnumerateResult {
+    seen.emplace_back(sym.str());
+    return EnumerateResult::Continue;
+  };
+
+  ASSERT_TRUE(SymbolEnumerator::enumerateSymbols(libC, onEach, Opts));
+
+  // sayC is exported, others are undefined → only sayC expected
+  EXPECT_TRUE(any_of(seen, [&](const std::string &s) {
+    return matchesEitherUnderscore(s, "sayC");
+  }));
+  EXPECT_FALSE(any_of(seen, [&](const std::string &s) {
+    return matchesEitherUnderscore(s, "sayA");
+  }));
+  EXPECT_FALSE(any_of(seen, [&](const std::string &s) {
+    return matchesEitherUnderscore(s, "sayB");
+  }));
+  EXPECT_FALSE(any_of(seen, [&](const std::string &s) {
+    return matchesEitherUnderscore(s, "sayZ");
+  }));
+}
+
+TEST_F(LibraryResolverIT, EnumerateSymbols_IncludesUndefineds) {
+  const std::string libC = lib("C");
+
+  SymbolEnumeratorOptions Opts;
+  Opts.FilterFlags =
+      SymbolEnumeratorOptions::IgnoreWeak |
+      SymbolEnumeratorOptions::IgnoreIndirect; // no IgnoreUndefined
+
+  std::vector<std::string> seen;
+  auto onEach = [&](llvm::StringRef sym) -> EnumerateResult {
+    seen.emplace_back(sym.str());
+    return EnumerateResult::Continue;
+  };
+
+  ASSERT_TRUE(SymbolEnumerator::enumerateSymbols(libC, onEach, Opts));
+
+  // Now we should see both sayC (export) and the undefined refs sayA, sayB,
+  // sayZ
+  EXPECT_TRUE(any_of(seen, [&](const std::string &s) {
+    return matchesEitherUnderscore(s, "sayC");
+  }));
+  EXPECT_TRUE(any_of(seen, [&](const std::string &s) {
+    return matchesEitherUnderscore(s, "sayA");
+  }));
+  EXPECT_TRUE(any_of(seen, [&](const std::string &s) {
+    return matchesEitherUnderscore(s, "sayB");
+  }));
+  EXPECT_TRUE(any_of(seen, [&](const std::string &s) {
+    return matchesEitherUnderscore(s, "sayZ");
+  }));
+}
+
+TEST_F(LibraryResolverIT, EnumerateSymbols_IndirectExportRespected) {
+  const std::string libD = lib("D");
+
+  SymbolEnumeratorOptions Opts;
+  Opts.FilterFlags = SymbolEnumeratorOptions::IgnoreWeak; // allow indirects
+
+  std::vector<std::string> seen;
+  auto onEach = [&](llvm::StringRef sym) -> EnumerateResult {
+    seen.emplace_back(sym.str());
+    return EnumerateResult::Continue;
+  };
+
+  ASSERT_TRUE(SymbolEnumerator::enumerateSymbols(libD, onEach, Opts));
+
+  // sayA is re-exported from A, so should appear unless IgnoreIndirect was set
+  EXPECT_TRUE(any_of(seen, [&](const std::string &s) {
+    return matchesEitherUnderscore(s, "sayA");
+  }));
+}
+
+// --- 2) Filters: if we remove IgnoreUndefined, we should also see undefineds
+// like printf ---
+TEST_F(LibraryResolverIT, EnumerateSymbolsIncludesUndefWhenNotIgnored) {
+  const std::string libA = lib("A");
+
+  SymbolEnumeratorOptions Opts = SymbolEnumeratorOptions::defaultOptions();
+  // Start from defaults but allow undefined
+  Opts.FilterFlags &= ~SymbolEnumeratorOptions::IgnoreUndefined;
+
+  bool SawPrintf = false;
+  auto onEach = [&](llvm::StringRef sym) -> EnumerateResult {
+    if (matchesEitherUnderscore(sym.str(), "printf") ||
+        matchesEitherUnderscore(sym.str(), "puts"))
+      SawPrintf = true;
+    return EnumerateResult::Continue;
+  };
+
+  ASSERT_TRUE(SymbolEnumerator::enumerateSymbols(libA, onEach, Opts));
+  EXPECT_TRUE(SawPrintf)
+      << "Expected to see undefined symbol printf when not filtered";
+}
+
+// --- 3) Full resolution via LibraryResolutionDriver/LibraryResolver ---
+TEST_F(LibraryResolverIT, DriverResolvesSymbolsToCorrectLibraries) {
+  // Create the resolver from real base paths (our fixtures dir)
+  auto Stup = LibraryResolver::Setup::create({BaseDir});
+
+  // Full system behavior: no mocks
+  auto Driver = LibraryResolutionDriver::create(Stup);
+  ASSERT_NE(Driver, nullptr);
+
+  // Tell the Driver about the scan path kinds (User/System) as your production
+  // code expects.
+  Driver->addScanPath(libdir("A"), PathType::User);
+  Driver->addScanPath(libdir("B"), PathType::User);
+  Driver->addScanPath(libdir("Z"), PathType::User);
+
+  // Symbols to resolve (bare names; class handles underscore differences
+  // internally)
+  std::vector<std::string> Syms = {platformSymbolName("sayA"),
+                                   platformSymbolName("sayB"),
+                                   platformSymbolName("sayZ")};
+
+  bool CallbackRan = false;
+  Driver->resolveSymbols(Syms, [&](SymbolQuery &Q) {
+    CallbackRan = true;
+
+    // sayA should resolve to A.dylib
+    {
+      auto lib = Q.getResolvedLib(platformSymbolName("sayA"));
+      ASSERT_TRUE(lib.has_value()) << "sayA should be resolved";
+      EXPECT_TRUE(endsWith(lib->str(), libname("A")))
+          << "sayA resolved to: " << lib->str();
+    }
+
+    // sayB should resolve to B.dylib
+    {
+      auto lib = Q.getResolvedLib(platformSymbolName("sayB"));
+      ASSERT_TRUE(lib.has_value()) << "sayB should be resolved";
+      EXPECT_TRUE(endsWith(lib->str(), libname("B")))
+          << "sayB resolved to: " << lib->str();
+    }
+
+    // sayZ should resolve to B.dylib
+    {
+      auto lib = Q.getResolvedLib(platformSymbolName("sayZ"));
+      ASSERT_TRUE(lib.has_value()) << "sayZ should be resolved";
+      EXPECT_TRUE(endsWith(lib->str(), libname("Z")))
+          << "sayZ resolved to: " << lib->str();
+    }
+
+    EXPECT_TRUE(Q.allResolved());
+  });
+
+  EXPECT_TRUE(CallbackRan);
+}
+
+// --- 4) Cross-library reference visibility (C references A) ---
+TEST_F(LibraryResolverIT, EnumeratorSeesInterLibraryRelationship) {
+  const std::string libC = lib("C");
+
+  SymbolEnumeratorOptions OnlyUndef = SymbolEnumeratorOptions::defaultOptions();
+  // Show only undefined (drop IgnoreUndefined) to see C's reference to sayA
+  OnlyUndef.FilterFlags &= ~SymbolEnumeratorOptions::IgnoreUndefined;
+
+  bool SawSayAAsUndef = false;
+  auto onEach = [&](llvm::StringRef sym) -> EnumerateResult {
+    if (matchesEitherUnderscore(sym.str(), "sayA"))
+      SawSayAAsUndef = true;
+    return EnumerateResult::Continue;
+  };
+
+  ASSERT_TRUE(SymbolEnumerator::enumerateSymbols(libC, onEach, OnlyUndef));
+  EXPECT_TRUE(SawSayAAsUndef)
+      << "libC should have an undefined reference to sayA (defined in libA)";
+}
+
+// // // --- 5) Optional: stress SymbolQuery with the real resolve flow
+// // // And resolve libC dependency libA, libB, libZ ---
+TEST_F(LibraryResolverIT, ResolveManySymbols) {
+  auto Stup = LibraryResolver::Setup::create({BaseDir});
+  auto Driver = LibraryResolutionDriver::create(Stup);
+  ASSERT_NE(Driver, nullptr);
+  Driver->addScanPath(libdir("C"), PathType::User);
+
+  // Many duplicates to provoke concurrent updates inside SymbolQuery
+  std::vector<std::string> Syms = {
+      platformSymbolName("sayA"), platformSymbolName("sayB"),
+      platformSymbolName("sayA"), platformSymbolName("sayB"),
+      platformSymbolName("sayZ"), platformSymbolName("sayZ"),
+      platformSymbolName("sayZ"), platformSymbolName("sayZ"),
+      platformSymbolName("sayA"), platformSymbolName("sayB"),
+      platformSymbolName("sayA"), platformSymbolName("sayB")};
+
+  bool CallbackRan = false;
+  Driver->resolveSymbols(Syms, [&](SymbolQuery &Q) {
+    CallbackRan = true;
+    EXPECT_TRUE(Q.isResolved(platformSymbolName("sayA")));
+    EXPECT_TRUE(Q.isResolved(platformSymbolName("sayB")));
+    EXPECT_TRUE(Q.isResolved(platformSymbolName("sayZ")));
+
+    auto A = Q.getResolvedLib(platformSymbolName("sayA"));
+    auto B = Q.getResolvedLib(platformSymbolName("sayB"));
+    auto Z = Q.getResolvedLib(platformSymbolName("sayZ"));
+    ASSERT_TRUE(A.has_value());
+    ASSERT_TRUE(B.has_value());
+    ASSERT_TRUE(Z.has_value());
+    EXPECT_TRUE(endsWith(A->str(), libname("A")));
+    EXPECT_TRUE(endsWith(B->str(), libname("B")));
+    EXPECT_TRUE(endsWith(Z->str(), libname("Z")));
+    EXPECT_TRUE(Q.allResolved());
+  });
+
+  EXPECT_TRUE(CallbackRan);
+}
+
+// // // --- 5) Optional: stress SymbolQuery with the real resolve flow
+// // // And resolve libD dependency libA ---
+TEST_F(LibraryResolverIT, ResolveManySymbols2) {
+  auto Stup = LibraryResolver::Setup::create({BaseDir});
+  auto Driver = LibraryResolutionDriver::create(Stup);
+  ASSERT_NE(Driver, nullptr);
+  Driver->addScanPath(libdir("D"), PathType::User);
+
+  // Many duplicates to provoke concurrent updates inside SymbolQuery
+  std::vector<std::string> Syms = {
+      platformSymbolName("sayA"), platformSymbolName("sayB"),
+      platformSymbolName("sayA"), platformSymbolName("sayB"),
+      platformSymbolName("sayZ"), platformSymbolName("sayZ"),
+      platformSymbolName("sayZ"), platformSymbolName("sayZ"),
+      platformSymbolName("sayD"), platformSymbolName("sayD"),
+      platformSymbolName("sayA"), platformSymbolName("sayB"),
+      platformSymbolName("sayA"), platformSymbolName("sayB")};
+
+  Driver->resolveSymbols(Syms, [&](SymbolQuery &Q) {
+    EXPECT_TRUE(Q.isResolved(platformSymbolName("sayA")));
+    EXPECT_TRUE(Q.isResolved(platformSymbolName("sayD")));
+
+    auto A = Q.getResolvedLib(platformSymbolName("sayA"));
+    auto D = Q.getResolvedLib(platformSymbolName("sayD"));
+    ASSERT_TRUE(A.has_value());
+    ASSERT_TRUE(D.has_value());
+    EXPECT_TRUE(endsWith(A->str(), libname("A")));
+    EXPECT_TRUE(endsWith(D->str(), libname("D")));
+    EXPECT_FALSE(Q.allResolved());
+  });
+}
+
+TEST_F(LibraryResolverIT, ScanSingleUserPath) {
+  auto LibPathCache = std::make_shared<LibraryPathCache>();
+  auto PResolver = std::make_shared<PathResolver>(LibPathCache);
+  LibraryScanHelper ScanH({}, LibPathCache, PResolver);
+
+  ScanH.addBasePath(libdir("C"), PathType::User);
+
+  std::error_code EC;
+  auto libCPathOpt = PResolver->resolve(lib("C"), EC);
+
+  if (!libCPathOpt || EC) {
+    FAIL();
+  }
+
+  std::string libCPath = *libCPathOpt;
+
+  LibraryManager LibMgr;
+  LibraryScanner Scanner(ScanH, LibMgr);
+
+  Scanner.scanNext(PathType::User, 0);
+
+  bool found = false;
+  LibMgr.forEachLibrary([&](const LibraryInfo &lib) {
+    if (lib.getFullPath() == libCPath) {
+      found = true;
+    }
+    return true;
+  });
+  EXPECT_TRUE(found) << "Expected to find " << libCPath;
+}
+
+TEST_F(LibraryResolverIT, ScanAndCheckDeps) {
+  auto LibPathCache = std::make_shared<LibraryPathCache>();
+  auto PResolver = std::make_shared<PathResolver>(LibPathCache);
+  LibraryScanHelper ScanH({}, LibPathCache, PResolver);
+
+  ScanH.addBasePath(libdir("C"), PathType::User);
+
+  LibraryManager LibMgr;
+  LibraryScanner Scanner(ScanH, LibMgr);
+
+  Scanner.scanNext(PathType::User, 0);
+
+  size_t count = 0;
+  LibMgr.forEachLibrary([&](const LibraryInfo &) {
+    count++;
+    return true;
+  });
+
+  EXPECT_GE(count, 3u) << "Should find at least libA in multiple paths";
+}
+
+TEST_F(LibraryResolverIT, ScanEmptyPath) {
+  auto LibPathCache = std::make_shared<LibraryPathCache>();
+  auto PResolver = std::make_shared<PathResolver>(LibPathCache);
+  LibraryScanHelper ScanH({}, LibPathCache, PResolver);
+
+  ScanH.addBasePath("/tmp/empty", PathType::User);
+
+  LibraryManager LibMgr;
+  LibraryScanner Scanner(ScanH, LibMgr);
+
+  Scanner.scanNext(PathType::User, 0);
+
+  size_t count = 0;
+  LibMgr.forEachLibrary([&](const LibraryInfo &) {
+    count++;
+    return true;
+  });
+  EXPECT_EQ(count, 0u);
+}
+
+TEST_F(LibraryResolverIT, PathResolverResolvesKnownPaths) {
+  auto LibPathCache = std::make_shared<LibraryPathCache>();
+  auto PResolver = std::make_shared<PathResolver>(LibPathCache);
+
+  std::error_code EC;
+  auto Missing = PResolver->resolve("temp/foo/bar", EC);
+  EXPECT_FALSE(Missing.has_value()) << "Unexpectedly resolved a bogus path";
+  EXPECT_TRUE(EC) << "Expected error resolving path";
+
+  auto DirPath = PResolver->resolve(BaseDir, EC);
+  ASSERT_TRUE(DirPath.has_value());
+  EXPECT_FALSE(EC) << "Expected no error resolving path";
+  EXPECT_EQ(*DirPath, BaseDir);
+
+  auto DylibPath = PResolver->resolve(lib("C"), EC);
+  ASSERT_TRUE(DylibPath.has_value());
+  EXPECT_FALSE(EC) << "Expected no error resolving path";
+  EXPECT_EQ(*DylibPath, lib("C"));
+}
+
+TEST_F(LibraryResolverIT, PathResolverNormalizesDotAndDotDot) {
+  auto LibPathCache = std::make_shared<LibraryPathCache>();
+  auto PResolver = std::make_shared<PathResolver>(LibPathCache);
+
+  std::error_code EC;
+
+  // e.g. BaseDir + "/./C/../C/C.dylib" → BaseDir + "/C.dylib"
+  std::string Messy = BaseDir + "/C/./../C/./libC" + ext;
+  auto Resolved = PResolver->resolve(Messy, EC);
+  ASSERT_TRUE(Resolved.has_value());
+  EXPECT_FALSE(EC);
+  EXPECT_EQ(*Resolved, lib("C")) << "Expected realpath to collapse . and ..";
+}
+
+#if !defined(_WIN32)
+TEST_F(LibraryResolverIT, PathResolverFollowsSymlinks) {
+  auto LibPathCache = std::make_shared<LibraryPathCache>();
+  auto PResolver = std::make_shared<PathResolver>(LibPathCache);
+
+  std::error_code EC;
+
+  // Create a symlink temp -> BaseDir (only if filesystem allows it)
+  std::string linkName = BaseDir + withext("/link_to_C");
+  std::string target = lib("C");
+  ::symlink(target.c_str(), linkName.c_str());
+
+  auto resolved = PResolver->resolve(linkName, EC);
+  ASSERT_TRUE(resolved.has_value());
+  EXPECT_FALSE(EC);
+  EXPECT_EQ(*resolved, target);
+
+  ::unlink(linkName.c_str()); // cleanup
+}
+
+TEST_F(LibraryResolverIT, PathResolverCachesResults) {
+  auto LibPathCache = std::make_shared<LibraryPathCache>();
+  auto PResolver = std::make_shared<PathResolver>(LibPathCache);
+
+  SmallString<128> TmpDylib;
+  sys::fs::createUniqueFile(withext("A-copy"), TmpDylib);
+  sys::fs::copy_file(lib("A"), TmpDylib);
+
+  std::error_code EC;
+
+  // First resolve -> should populate LibPathCache
+  auto first = PResolver->resolve(TmpDylib, EC);
+  ASSERT_TRUE(first.has_value());
+
+  // Forcefully remove the file from disk
+  ::unlink(TmpDylib.c_str());
+
+  // Second resolve -> should still succeed from LibPathCache
+  auto second = PResolver->resolve(TmpDylib, EC);
+  EXPECT_TRUE(second.has_value());
+  EXPECT_EQ(*second, *first);
+}
+#endif
+
+TEST_F(LibraryResolverIT, LoaderPathSubstitutionAndResolve) {
+  auto LibPathCache = std::make_shared<LibraryPathCache>();
+  auto PResolver = std::make_shared<PathResolver>(LibPathCache);
+
+  DylibSubstitutor substitutor;
+  substitutor.configure(libdir("C"));
+#if defined(__APPLE__)
+  // Substitute @loader_path with BaseDir
+  std::string substituted =
+      substitutor.substitute(withext("@loader_path/libC"));
+#elif defined(__linux__)
+  // Substitute $origin with BaseDir
+  std::string substituted = substitutor.substitute(withext("$ORIGIN/libC"));
+#endif
+  ASSERT_FALSE(substituted.empty());
+  EXPECT_EQ(substituted, lib("C"));
+
+  // Now try resolving the substituted path
+  std::error_code EC;
+  auto resolved = PResolver->resolve(substituted, EC);
+  ASSERT_TRUE(resolved.has_value()) << "Expected to resolve substituted dylib";
+  EXPECT_EQ(*resolved, lib("C"));
+  EXPECT_FALSE(EC) << "Expected no error resolving substituted dylib";
+}
+
+TEST_F(LibraryResolverIT, ResolveFromUsrOrSystemPaths) {
+  auto LibPathCache = std::make_shared<LibraryPathCache>();
+  auto PResolver = std::make_shared<PathResolver>(LibPathCache);
+
+  DylibPathValidator validator(*PResolver);
+
+  std::vector<std::string> Paths = {"/foo/bar/", "temp/foo",  libdir("C"),
+                                    libdir("A"), libdir("B"), libdir("Z")};
+
+  SmallVector<StringRef> P(Paths.begin(), Paths.end());
+
+  DylibResolver Resolver(validator);
+  Resolver.configure("", {{P, SearchPathType::UsrOrSys}});
+
+  // Check "C"
+  auto ValOptC = Resolver.resolve("libC", true);
+  EXPECT_TRUE(ValOptC.has_value());
+  EXPECT_EQ(*ValOptC, lib("C"));
+
+  auto ValOptCdylib = Resolver.resolve(withext("libC"));
+  EXPECT_TRUE(ValOptCdylib.has_value());
+  EXPECT_EQ(*ValOptCdylib, lib("C"));
+
+  // Check "A"
+  auto ValOptA = Resolver.resolve("libA", true);
+  EXPECT_TRUE(ValOptA.has_value());
+  EXPECT_EQ(*ValOptA, lib("A"));
+
+  auto ValOptAdylib = Resolver.resolve(withext("libA"));
+  EXPECT_TRUE(ValOptAdylib.has_value());
+  EXPECT_EQ(*ValOptAdylib, lib("A"));
+
+  // Check "B"
+  auto ValOptB = Resolver.resolve("libB", true);
+  EXPECT_TRUE(ValOptB.has_value());
+  EXPECT_EQ(*ValOptB, lib("B"));
+
+  auto ValOptBdylib = Resolver.resolve(withext("libB"));
+  EXPECT_TRUE(ValOptBdylib.has_value());
+  EXPECT_EQ(*ValOptBdylib, lib("B"));
+
+  // Check "Z"
+  auto ValOptZ = Resolver.resolve("libZ", true);
+  EXPECT_TRUE(ValOptZ.has_value());
+  EXPECT_EQ(*ValOptZ, lib("Z"));
+
+  auto ValOptZdylib = Resolver.resolve(withext("libZ"));
+  EXPECT_TRUE(ValOptZdylib.has_value());
+  EXPECT_EQ(*ValOptZdylib, lib("Z"));
+}
+
+#if defined(__APPLE__)
+TEST_F(LibraryResolverIT, ResolveViaLoaderPathAndRPathSubstitution) {
+  auto LibPathCache = std::make_shared<LibraryPathCache>();
+  auto PResolver = std::make_shared<PathResolver>(LibPathCache);
+
+  DylibPathValidator validator(*PResolver);
+
+  std::vector<std::string> Paths = {"@loader_path/../A", "@loader_path/../B",
+                                    "@loader_path/../D", "@loader_path/../Z"};
+
+  SmallVector<StringRef> P(Paths.begin(), Paths.end());
+
+  DylibResolver Resolver(validator);
+
+  // Use only RPath config
+  Resolver.configure(lib("C"), {{P, SearchPathType::RPath}});
+
+  // --- Check A ---
+  auto ValOptA = Resolver.resolve("@rpath/libA", true);
+  EXPECT_TRUE(ValOptA.has_value());
+  EXPECT_EQ(*ValOptA, lib("A"));
+
+  auto ValOptAdylib = Resolver.resolve(withext("@rpath/libA"));
+  EXPECT_TRUE(ValOptAdylib.has_value());
+  EXPECT_EQ(*ValOptAdylib, lib("A"));
+
+  // --- Check B ---
+  auto ValOptB = Resolver.resolve("@rpath/libB", true);
+  EXPECT_TRUE(ValOptB.has_value());
+  EXPECT_EQ(*ValOptB, lib("B"));
+
+  auto ValOptBdylib = Resolver.resolve(withext("@rpath/libB"));
+  EXPECT_TRUE(ValOptBdylib.has_value());
+  EXPECT_EQ(*ValOptBdylib, lib("B"));
+
+  // --- Check Z ---
+  auto ValOptZ = Resolver.resolve("@rpath/libZ", true);
+  EXPECT_TRUE(ValOptZ.has_value());
+  EXPECT_EQ(*ValOptZ, lib("Z"));
+
+  auto ValOptZdylib = Resolver.resolve(withext("@rpath/libZ"));
+  EXPECT_TRUE(ValOptZdylib.has_value());
+  EXPECT_EQ(*ValOptZdylib, lib("Z"));
+}
+#endif
+
+#if defined(__linux__)
+TEST_F(LibraryResolverIT, ResolveViaOriginAndRPathSubstitution) {
+  auto LibPathCache = std::make_shared<LibraryPathCache>();
+  auto PResolver = std::make_shared<PathResolver>(LibPathCache);
+
+  DylibPathValidator validator(*PResolver);
+
+  // On Linux, $ORIGIN works like @loader_path
+  std::vector<std::string> Paths = {"$ORIGIN/../A", "$ORIGIN/../B",
+                                    "$ORIGIN/../D", "$ORIGIN/../Z"};
+
+  SmallVector<StringRef> P(Paths.begin(), Paths.end());
+
+  DylibResolver Resolver(validator);
+
+  // Use only RPath config
+  Resolver.configure(lib("C"), {{P, SearchPathType::RunPath}});
+
+  // --- Check A ---
+  auto ValOptA = Resolver.resolve("libA", true);
+  EXPECT_TRUE(ValOptA.has_value());
+  EXPECT_EQ(*ValOptA, lib("A"));
+
+  auto valOptASO = Resolver.resolve(withext("libA"));
+  EXPECT_TRUE(valOptASO.has_value());
+  EXPECT_EQ(*valOptASO, lib("A"));
+
+  // --- Check B ---
+  auto ValOptB = Resolver.resolve("libB", true);
+  EXPECT_TRUE(ValOptB.has_value());
+  EXPECT_EQ(*ValOptB, lib("B"));
+
+  auto valOptBSO = Resolver.resolve(withext("libB"));
+  EXPECT_TRUE(valOptBSO.has_value());
+  EXPECT_EQ(*valOptBSO, lib("B"));
+
+  // --- Check Z ---
+  auto ValOptZ = Resolver.resolve("libZ", true);
+  EXPECT_TRUE(ValOptZ.has_value());
+  EXPECT_EQ(*ValOptZ, lib("Z"));
+
+  auto valOptZSO = Resolver.resolve(withext("libZ"));
+  EXPECT_TRUE(valOptZSO.has_value());
+  EXPECT_EQ(*valOptZSO, lib("Z"));
+}
+#endif
+} // namespace
+#endif // defined(__APPLE__)
diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index e747006..278c29c 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -911,7 +911,6 @@ if (current_toolchain == default_toolchain) {
       "__cxx03/cmath",
       "__cxx03/codecvt",
       "__cxx03/complex",
-      "__cxx03/complex.h",
       "__cxx03/condition_variable",
       "__cxx03/csetjmp",
       "__cxx03/csignal",
@@ -924,25 +923,20 @@ if (current_toolchain == default_toolchain) {
       "__cxx03/cstring",
       "__cxx03/ctgmath",
       "__cxx03/ctime",
-      "__cxx03/ctype.h",
       "__cxx03/cuchar",
       "__cxx03/cwchar",
       "__cxx03/cwctype",
       "__cxx03/deque",
-      "__cxx03/errno.h",
       "__cxx03/exception",
       "__cxx03/experimental/__config",
       "__cxx03/experimental/utility",
       "__cxx03/ext/__hash",
       "__cxx03/ext/hash_map",
       "__cxx03/ext/hash_set",
-      "__cxx03/fenv.h",
-      "__cxx03/float.h",
       "__cxx03/forward_list",
       "__cxx03/fstream",
       "__cxx03/functional",
       "__cxx03/future",
-      "__cxx03/inttypes.h",
       "__cxx03/iomanip",
       "__cxx03/ios",
       "__cxx03/iosfwd",
@@ -969,11 +963,8 @@ if (current_toolchain == default_toolchain) {
       "__cxx03/sstream",
       "__cxx03/stack",
       "__cxx03/stdatomic.h",
-      "__cxx03/stdbool.h",
-      "__cxx03/stddef.h",
       "__cxx03/stdexcept",
       "__cxx03/stdint.h",
-      "__cxx03/stdio.h",
       "__cxx03/stdlib.h",
       "__cxx03/streambuf",
       "__cxx03/string",
@@ -981,7 +972,6 @@ if (current_toolchain == default_toolchain) {
       "__cxx03/string_view",
       "__cxx03/strstream",
       "__cxx03/system_error",
-      "__cxx03/tgmath.h",
       "__cxx03/thread",
       "__cxx03/type_traits",
       "__cxx03/typeindex",
@@ -994,7 +984,6 @@ if (current_toolchain == default_toolchain) {
       "__cxx03/vector",
       "__cxx03/version",
       "__cxx03/wchar.h",
-      "__cxx03/wctype.h",
       "__debug_utils/randomize_range.h",
       "__debug_utils/sanitizers.h",
       "__debug_utils/strict_weak_ordering_check.h",
diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn
index c4ce990..937e81b 100644
--- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn
@@ -12,6 +12,8 @@ static_library("TargetProcess") {
     "JITLoaderGDB.cpp",
     "JITLoaderPerf.cpp",
     "JITLoaderVTune.cpp",
+    "LibraryResolver.cpp",
+    "LibraryScanner.cpp",
     "OrcRTBootstrap.cpp",
     "RegisterEHFrames.cpp",
     "SimpleExecutorDylibManager.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn
index dfe6d6d..111e4c9 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn
@@ -24,6 +24,7 @@ unittest("OrcJITTests") {
     "JITLinkRedirectionManagerTest.cpp",
     "JITTargetMachineBuilderTest.cpp",
     "LazyCallThroughAndReexportsTest.cpp",
+    "LibraryResolverTest.cpp",
     "LookupAndRecordAddrsTest.cpp",
     "MachOPlatformTest.cpp",
     "MapperJITLinkMemoryManagerTest.cpp",
diff --git a/llvm/utils/lldbDataFormatters.py b/llvm/utils/lldbDataFormatters.py
index 5e553ca..a3e4ae1 100644
--- a/llvm/utils/lldbDataFormatters.py
+++ b/llvm/utils/lldbDataFormatters.py
@@ -197,6 +197,11 @@ def StringRefSummaryProvider(valobj, internal_dict):
         return '""'
 
     data = data_pointer.deref
+    # StringRef may be uninitialized with length exceeding available memory,
+    # potentially causing bad_alloc exceptions. Limit the length to max string summary setting.
+    limit_obj = valobj.target.debugger.GetSetting("target.max-string-summary-length")
+    if limit_obj:
+        length = min(length, limit_obj.GetUnsignedIntegerValue())
     # Get a char[N] type, from the underlying char type.
     array_type = data.type.GetArrayType(length)
     # Cast the char* string data to a char[N] array.