diff options
Diffstat (limited to 'llvm/lib')
104 files changed, 2005 insertions, 531 deletions
diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp index 571c5b3..003c850 100644 --- a/llvm/lib/CAS/ActionCaches.cpp +++ b/llvm/lib/CAS/ActionCaches.cpp @@ -13,7 +13,11 @@  #include "BuiltinCAS.h"  #include "llvm/ADT/TrieRawHashMap.h"  #include "llvm/CAS/ActionCache.h" +#include "llvm/CAS/OnDiskKeyValueDB.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/Support/BLAKE3.h" +#include "llvm/Support/Errc.h"  #define DEBUG_TYPE "cas-action-caches" @@ -47,12 +51,54 @@ public:    Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,                                           bool CanBeDistributed) const final; +  Error validate() const final { +    return createStringError("InMemoryActionCache doesn't support validate()"); +  } +  private:    using DataT = CacheEntry<sizeof(HashType)>;    using InMemoryCacheT = ThreadSafeTrieRawHashMap<DataT, sizeof(HashType)>;    InMemoryCacheT Cache;  }; + +/// Builtin basic OnDiskActionCache that uses one underlying OnDiskKeyValueDB. +class OnDiskActionCache final : public ActionCache { +public: +  Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result, +                bool CanBeDistributed) final; +  Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey, +                                         bool CanBeDistributed) const final; + +  static Expected<std::unique_ptr<OnDiskActionCache>> create(StringRef Path); + +  Error validate() const final; + +private: +  static StringRef getHashName() { return "BLAKE3"; } + +  OnDiskActionCache(std::unique_ptr<ondisk::OnDiskKeyValueDB> DB); + +  std::unique_ptr<ondisk::OnDiskKeyValueDB> DB; +  using DataT = CacheEntry<sizeof(HashType)>; +}; + +/// Builtin unified ActionCache that wraps around UnifiedOnDiskCache to provide +/// access to its ActionCache. +class UnifiedOnDiskActionCache final : public ActionCache { +public: +  Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result, +                bool CanBeDistributed) final; +  Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey, +                                         bool CanBeDistributed) const final; + +  UnifiedOnDiskActionCache(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB); + +  Error validate() const final; + +private: +  std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB; +};  } // end namespace  static Error createResultCachePoisonedError(ArrayRef<uint8_t> KeyHash, @@ -99,3 +145,123 @@ std::unique_ptr<ActionCache> createInMemoryActionCache() {  }  } // namespace llvm::cas + +OnDiskActionCache::OnDiskActionCache( +    std::unique_ptr<ondisk::OnDiskKeyValueDB> DB) +    : ActionCache(builtin::BuiltinCASContext::getDefaultContext()), +      DB(std::move(DB)) {} + +Expected<std::unique_ptr<OnDiskActionCache>> +OnDiskActionCache::create(StringRef AbsPath) { +  std::unique_ptr<ondisk::OnDiskKeyValueDB> DB; +  if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(), +                                               sizeof(HashType), getHashName(), +                                               sizeof(DataT)) +                    .moveInto(DB)) +    return std::move(E); +  return std::unique_ptr<OnDiskActionCache>( +      new OnDiskActionCache(std::move(DB))); +} + +Expected<std::optional<CASID>> +OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key, +                           bool /*CanBeDistributed*/) const { +  std::optional<ArrayRef<char>> Val; +  if (Error E = DB->get(Key).moveInto(Val)) +    return std::move(E); +  if (!Val) +    return std::nullopt; +  return CASID::create(&getContext(), toStringRef(*Val)); +} + +Error OnDiskActionCache::putImpl(ArrayRef<uint8_t> Key, const CASID &Result, +                                 bool /*CanBeDistributed*/) { +  auto ResultHash = Result.getHash(); +  ArrayRef Expected((const char *)ResultHash.data(), ResultHash.size()); +  ArrayRef<char> Observed; +  if (Error E = DB->put(Key, Expected).moveInto(Observed)) +    return E; + +  if (Expected == Observed) +    return Error::success(); + +  return createResultCachePoisonedError( +      Key, getContext(), Result, +      ArrayRef((const uint8_t *)Observed.data(), Observed.size())); +} + +Error OnDiskActionCache::validate() const { +  // FIXME: without the matching CAS there is nothing we can check about the +  // cached values. The hash size is already validated by the DB validator. +  return DB->validate(nullptr); +} + +UnifiedOnDiskActionCache::UnifiedOnDiskActionCache( +    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) +    : ActionCache(builtin::BuiltinCASContext::getDefaultContext()), +      UniDB(std::move(UniDB)) {} + +Expected<std::optional<CASID>> +UnifiedOnDiskActionCache::getImpl(ArrayRef<uint8_t> Key, +                                  bool /*CanBeDistributed*/) const { +  std::optional<ArrayRef<char>> Val; +  if (Error E = UniDB->getKeyValueDB().get(Key).moveInto(Val)) +    return std::move(E); +  if (!Val) +    return std::nullopt; +  auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Val); +  return CASID::create(&getContext(), +                       toStringRef(UniDB->getGraphDB().getDigest(ID))); +} + +Error UnifiedOnDiskActionCache::putImpl(ArrayRef<uint8_t> Key, +                                        const CASID &Result, +                                        bool /*CanBeDistributed*/) { +  auto Expected = UniDB->getGraphDB().getReference(Result.getHash()); +  if (LLVM_UNLIKELY(!Expected)) +    return Expected.takeError(); + +  auto Value = ondisk::UnifiedOnDiskCache::getValueFromObjectID(*Expected); +  std::optional<ArrayRef<char>> Observed; +  if (Error E = UniDB->getKeyValueDB().put(Key, Value).moveInto(Observed)) +    return E; + +  auto ObservedID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Observed); +  if (*Expected == ObservedID) +    return Error::success(); + +  return createResultCachePoisonedError( +      Key, getContext(), Result, UniDB->getGraphDB().getDigest(ObservedID)); +} + +Error UnifiedOnDiskActionCache::validate() const { +  auto ValidateRef = [](FileOffset Offset, ArrayRef<char> Value) -> Error { +    auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value); +    auto formatError = [&](Twine Msg) { +      return createStringError( +          llvm::errc::illegal_byte_sequence, +          "bad record at 0x" + +              utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + +              Msg.str()); +    }; +    if (ID.getOpaqueData() == 0) +      return formatError("zero is not a valid ref"); +    return Error::success(); +  }; +  return UniDB->getKeyValueDB().validate(ValidateRef); +} + +Expected<std::unique_ptr<ActionCache>> +cas::createOnDiskActionCache(StringRef Path) { +#if LLVM_ENABLE_ONDISK_CAS +  return OnDiskActionCache::create(Path); +#else +  return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled"); +#endif +} + +std::unique_ptr<ActionCache> +cas::builtin::createActionCacheFromUnifiedOnDiskCache( +    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) { +  return std::make_unique<UnifiedOnDiskActionCache>(std::move(UniDB)); +} diff --git a/llvm/lib/CAS/BuiltinCAS.cpp b/llvm/lib/CAS/BuiltinCAS.cpp index 73646ad..e9bc6d8 100644 --- a/llvm/lib/CAS/BuiltinCAS.cpp +++ b/llvm/lib/CAS/BuiltinCAS.cpp @@ -9,6 +9,7 @@  #include "BuiltinCAS.h"  #include "llvm/ADT/StringExtras.h"  #include "llvm/CAS/BuiltinObjectHasher.h" +#include "llvm/CAS/UnifiedOnDiskCache.h"  #include "llvm/Support/Process.h"  using namespace llvm; @@ -68,7 +69,7 @@ Expected<ObjectRef> BuiltinCAS::store(ArrayRef<ObjectRef> Refs,                     Refs, Data);  } -Error BuiltinCAS::validate(const CASID &ID) { +Error BuiltinCAS::validateObject(const CASID &ID) {    auto Ref = getReference(ID);    if (!Ref)      return createUnknownObjectError(ID); @@ -92,3 +93,14 @@ Error BuiltinCAS::validate(const CASID &ID) {    return Error::success();  } + +Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>> +cas::builtin::createBuiltinUnifiedOnDiskCache(StringRef Path) { +#if LLVM_ENABLE_ONDISK_CAS +  return ondisk::UnifiedOnDiskCache::open(Path, /*SizeLimit=*/std::nullopt, +                                          BuiltinCASContext::getHashName(), +                                          sizeof(HashType)); +#else +  return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled"); +#endif +} diff --git a/llvm/lib/CAS/BuiltinCAS.h b/llvm/lib/CAS/BuiltinCAS.h index 3b5374d..4d2de66 100644 --- a/llvm/lib/CAS/BuiltinCAS.h +++ b/llvm/lib/CAS/BuiltinCAS.h @@ -1,4 +1,4 @@ -//===- BuiltinCAS.h ---------------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===//  //  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.  // See https://llvm.org/LICENSE.txt for license information. @@ -15,6 +15,9 @@  namespace llvm::cas {  class ActionCache; +namespace ondisk { +class UnifiedOnDiskCache; +} // namespace ondisk  namespace builtin {  /// Common base class for builtin CAS implementations using the same CASContext. @@ -65,9 +68,27 @@ public:                               "corrupt storage");    } -  Error validate(const CASID &ID) final; +  Error validateObject(const CASID &ID) final;  }; +/// Create a \p UnifiedOnDiskCache instance that uses \p BLAKE3 hashing. +Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>> +createBuiltinUnifiedOnDiskCache(StringRef Path); + +/// \param UniDB A \p UnifiedOnDiskCache instance from \p +/// createBuiltinUnifiedOnDiskCache. +std::unique_ptr<ObjectStore> createObjectStoreFromUnifiedOnDiskCache( +    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB); + +/// \param UniDB A \p UnifiedOnDiskCache instance from \p +/// createBuiltinUnifiedOnDiskCache. +std::unique_ptr<ActionCache> createActionCacheFromUnifiedOnDiskCache( +    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB); + +// FIXME: Proxy not portable. Maybe also error-prone? +constexpr StringLiteral DefaultDirProxy = "/^llvm::cas::builtin::default"; +constexpr StringLiteral DefaultDir = "llvm.cas.builtin.default"; +  } // end namespace builtin  } // end namespace llvm::cas diff --git a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp new file mode 100644 index 0000000..f3f6fa0 --- /dev/null +++ b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/BuiltinUnifiedCASDatabases.h" +#include "BuiltinCAS.h" +#include "llvm/CAS/ActionCache.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" + +using namespace llvm; +using namespace llvm::cas; + +Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>> +cas::createOnDiskUnifiedCASDatabases(StringRef Path) { +  std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB; +  if (Error E = builtin::createBuiltinUnifiedOnDiskCache(Path).moveInto(UniDB)) +    return std::move(E); +  auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB); +  auto AC = builtin::createActionCacheFromUnifiedOnDiskCache(std::move(UniDB)); +  return std::make_pair(std::move(CAS), std::move(AC)); +} + +Expected<ValidationResult> cas::validateOnDiskUnifiedCASDatabasesIfNeeded( +    StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation, +    std::optional<StringRef> LLVMCasBinary) { +#if LLVM_ENABLE_ONDISK_CAS +  return ondisk::UnifiedOnDiskCache::validateIfNeeded( +      Path, builtin::BuiltinCASContext::getHashName(), +      sizeof(builtin::HashType), CheckHash, AllowRecovery, ForceValidation, +      LLVMCasBinary); +#else +  return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled"); +#endif +} diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt index a2f8c49..aad77dc 100644 --- a/llvm/lib/CAS/CMakeLists.txt +++ b/llvm/lib/CAS/CMakeLists.txt @@ -2,15 +2,18 @@ add_llvm_component_library(LLVMCAS    ActionCache.cpp    ActionCaches.cpp    BuiltinCAS.cpp +  BuiltinUnifiedCASDatabases.cpp    DatabaseFile.cpp    InMemoryCAS.cpp    MappedFileRegionArena.cpp    ObjectStore.cpp +  OnDiskCAS.cpp    OnDiskCommon.cpp    OnDiskDataAllocator.cpp    OnDiskGraphDB.cpp    OnDiskKeyValueDB.cpp    OnDiskTrieRawHashMap.cpp +  UnifiedOnDiskCache.cpp    ADDITIONAL_HEADER_DIRS    ${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS diff --git a/llvm/lib/CAS/InMemoryCAS.cpp b/llvm/lib/CAS/InMemoryCAS.cpp index c63ee70d..2d4eedd 100644 --- a/llvm/lib/CAS/InMemoryCAS.cpp +++ b/llvm/lib/CAS/InMemoryCAS.cpp @@ -233,6 +233,12 @@ public:      return cast<InMemoryObject>(asInMemoryObject(Node)).getData();    } +  void print(raw_ostream &OS) const final; + +  Error validate(bool CheckHash) const final { +    return createStringError("InMemoryCAS doesn't support validate()"); +  } +    InMemoryCAS() = default;  private: @@ -271,6 +277,8 @@ ArrayRef<const InMemoryObject *> InMemoryObject::getRefs() const {    return cast<InMemoryInlineObject>(this)->getRefsImpl();  } +void InMemoryCAS::print(raw_ostream &OS) const {} +  Expected<ObjectRef>  InMemoryCAS::storeFromNullTerminatedRegion(ArrayRef<uint8_t> ComputedHash,                                             sys::fs::mapped_file_region Map) { diff --git a/llvm/lib/CAS/ObjectStore.cpp b/llvm/lib/CAS/ObjectStore.cpp index e0be50b..3110577 100644 --- a/llvm/lib/CAS/ObjectStore.cpp +++ b/llvm/lib/CAS/ObjectStore.cpp @@ -1,4 +1,4 @@ -//===- ObjectStore.cpp ------------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===//  //  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.  // See https://llvm.org/LICENSE.txt for license information. @@ -12,7 +12,7 @@  #include "llvm/Support/Errc.h"  #include "llvm/Support/FileSystem.h"  #include "llvm/Support/MemoryBuffer.h" -#include <optional> +#include <deque>  using namespace llvm;  using namespace llvm::cas; @@ -21,6 +21,7 @@ void CASContext::anchor() {}  void ObjectStore::anchor() {}  LLVM_DUMP_METHOD void CASID::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); }  LLVM_DUMP_METHOD void ObjectRef::dump() const { print(dbgs()); }  LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(dbgs()); } @@ -141,7 +142,7 @@ Error ObjectStore::validateTree(ObjectRef Root) {      auto [I, Inserted] = ValidatedRefs.insert(Ref);      if (!Inserted)        continue; // already validated. -    if (Error E = validate(getID(Ref))) +    if (Error E = validateObject(getID(Ref)))        return E;      Expected<ObjectHandle> Obj = load(Ref);      if (!Obj) @@ -155,6 +156,92 @@ Error ObjectStore::validateTree(ObjectRef Root) {    return Error::success();  } +Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream, +                                              ObjectRef Other) { +  // Copy the full CAS tree from upstream with depth-first ordering to ensure +  // all the child nodes are available in downstream CAS before inserting +  // current object. This uses a similar algorithm as +  // `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema +  // so it can be used to import from any other ObjectStore reguardless of the +  // CAS schema. + +  // There is no work to do if importing from self. +  if (this == &Upstream) +    return Other; + +  /// Keeps track of the state of visitation for current node and all of its +  /// parents. Upstream Cursor holds information only from upstream CAS. +  struct UpstreamCursor { +    ObjectRef Ref; +    ObjectHandle Node; +    size_t RefsCount; +    std::deque<ObjectRef> Refs; +  }; +  SmallVector<UpstreamCursor, 16> CursorStack; +  /// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either +  /// just stored in the CAS or nodes already exists in the current CAS. +  SmallVector<ObjectRef, 128> PrimaryRefStack; +  /// A map from upstream ObjectRef to current ObjectRef. +  llvm::DenseMap<ObjectRef, ObjectRef> CreatedObjects; + +  auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) { +    unsigned NumRefs = Upstream.getNumRefs(Node); +    std::deque<ObjectRef> Refs; +    for (unsigned I = 0; I < NumRefs; ++I) +      Refs.push_back(Upstream.readRef(Node, I)); + +    CursorStack.push_back({Ref, Node, NumRefs, std::move(Refs)}); +  }; + +  auto UpstreamHandle = Upstream.load(Other); +  if (!UpstreamHandle) +    return UpstreamHandle.takeError(); +  enqueueNode(Other, *UpstreamHandle); + +  while (!CursorStack.empty()) { +    UpstreamCursor &Cur = CursorStack.back(); +    if (Cur.Refs.empty()) { +      // Copy the node data into the primary store. +      // The bottom of \p PrimaryRefStack contains the ObjectRef for the +      // current node. +      assert(PrimaryRefStack.size() >= Cur.RefsCount); +      auto Refs = ArrayRef(PrimaryRefStack) +                      .slice(PrimaryRefStack.size() - Cur.RefsCount); +      auto NewNode = store(Refs, Upstream.getData(Cur.Node)); +      if (!NewNode) +        return NewNode.takeError(); + +      // Remove the current node and its IDs from the stack. +      PrimaryRefStack.truncate(PrimaryRefStack.size() - Cur.RefsCount); +      CursorStack.pop_back(); + +      PrimaryRefStack.push_back(*NewNode); +      CreatedObjects.try_emplace(Cur.Ref, *NewNode); +      continue; +    } + +    // Check if the node exists already. +    auto CurrentID = Cur.Refs.front(); +    Cur.Refs.pop_front(); +    auto Ref = CreatedObjects.find(CurrentID); +    if (Ref != CreatedObjects.end()) { +      // If exists already, just need to enqueue the primary node. +      PrimaryRefStack.push_back(Ref->second); +      continue; +    } + +    // Load child. +    auto PrimaryID = Upstream.load(CurrentID); +    if (LLVM_UNLIKELY(!PrimaryID)) +      return PrimaryID.takeError(); + +    enqueueNode(CurrentID, *PrimaryID); +  } + +  assert(PrimaryRefStack.size() == 1); +  return PrimaryRefStack.front(); +} +  std::unique_ptr<MemoryBuffer>  ObjectProxy::getMemoryBuffer(StringRef Name,                               bool RequiresNullTerminator) const { diff --git a/llvm/lib/CAS/OnDiskCAS.cpp b/llvm/lib/CAS/OnDiskCAS.cpp new file mode 100644 index 0000000..7d29f44 --- /dev/null +++ b/llvm/lib/CAS/OnDiskCAS.cpp @@ -0,0 +1,211 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BuiltinCAS.h" +#include "llvm/CAS/BuiltinCASContext.h" +#include "llvm/CAS/BuiltinObjectHasher.h" +#include "llvm/CAS/OnDiskGraphDB.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Error.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::builtin; + +namespace { + +class OnDiskCAS : public BuiltinCAS { +public: +  Expected<ObjectRef> storeImpl(ArrayRef<uint8_t> ComputedHash, +                                ArrayRef<ObjectRef> Refs, +                                ArrayRef<char> Data) final; + +  Expected<std::optional<ObjectHandle>> loadIfExists(ObjectRef Ref) final; + +  CASID getID(ObjectRef Ref) const final; + +  std::optional<ObjectRef> getReference(const CASID &ID) const final; + +  Expected<bool> isMaterialized(ObjectRef Ref) const final; + +  ArrayRef<char> getDataConst(ObjectHandle Node) const final; + +  void print(raw_ostream &OS) const final; +  Error validate(bool CheckHash) const final; + +  static Expected<std::unique_ptr<OnDiskCAS>> open(StringRef Path); + +  OnDiskCAS(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) +      : UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {} + +private: +  ObjectHandle convertHandle(ondisk::ObjectHandle Node) const { +    return makeObjectHandle(Node.getOpaqueData()); +  } + +  ondisk::ObjectHandle convertHandle(ObjectHandle Node) const { +    return ondisk::ObjectHandle(Node.getInternalRef(*this)); +  } + +  ObjectRef convertRef(ondisk::ObjectID Ref) const { +    return makeObjectRef(Ref.getOpaqueData()); +  } + +  ondisk::ObjectID convertRef(ObjectRef Ref) const { +    return ondisk::ObjectID::fromOpaqueData(Ref.getInternalRef(*this)); +  } + +  size_t getNumRefs(ObjectHandle Node) const final { +    auto RefsRange = DB->getObjectRefs(convertHandle(Node)); +    return std::distance(RefsRange.begin(), RefsRange.end()); +  } + +  ObjectRef readRef(ObjectHandle Node, size_t I) const final { +    auto RefsRange = DB->getObjectRefs(convertHandle(Node)); +    return convertRef(RefsRange.begin()[I]); +  } + +  Error forEachRef(ObjectHandle Node, +                   function_ref<Error(ObjectRef)> Callback) const final; + +  Error setSizeLimit(std::optional<uint64_t> SizeLimit) final; +  Expected<std::optional<uint64_t>> getStorageSize() const final; +  Error pruneStorageData() final; + +  OnDiskCAS(std::unique_ptr<ondisk::OnDiskGraphDB> GraphDB) +      : OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {} + +  std::unique_ptr<ondisk::OnDiskGraphDB> OwnedDB; +  std::shared_ptr<ondisk::UnifiedOnDiskCache> UnifiedDB; +  ondisk::OnDiskGraphDB *DB; +}; + +} // end anonymous namespace + +void OnDiskCAS::print(raw_ostream &OS) const { DB->print(OS); } +Error OnDiskCAS::validate(bool CheckHash) const { +  auto Hasher = [](ArrayRef<ArrayRef<uint8_t>> Refs, ArrayRef<char> Data, +                   SmallVectorImpl<uint8_t> &Result) { +    auto Hash = BuiltinObjectHasher<llvm::cas::builtin::HasherT>::hashObject( +        Refs, Data); +    Result.assign(Hash.begin(), Hash.end()); +  }; + +  if (auto E = DB->validate(CheckHash, Hasher)) +    return E; + +  return Error::success(); +} + +CASID OnDiskCAS::getID(ObjectRef Ref) const { +  ArrayRef<uint8_t> Hash = DB->getDigest(convertRef(Ref)); +  return CASID::create(&getContext(), toStringRef(Hash)); +} + +std::optional<ObjectRef> OnDiskCAS::getReference(const CASID &ID) const { +  std::optional<ondisk::ObjectID> ObjID = +      DB->getExistingReference(ID.getHash()); +  if (!ObjID) +    return std::nullopt; +  return convertRef(*ObjID); +} + +Expected<bool> OnDiskCAS::isMaterialized(ObjectRef ExternalRef) const { +  return DB->isMaterialized(convertRef(ExternalRef)); +} + +ArrayRef<char> OnDiskCAS::getDataConst(ObjectHandle Node) const { +  return DB->getObjectData(convertHandle(Node)); +} + +Expected<std::optional<ObjectHandle>> +OnDiskCAS::loadIfExists(ObjectRef ExternalRef) { +  Expected<std::optional<ondisk::ObjectHandle>> ObjHnd = +      DB->load(convertRef(ExternalRef)); +  if (!ObjHnd) +    return ObjHnd.takeError(); +  if (!*ObjHnd) +    return std::nullopt; +  return convertHandle(**ObjHnd); +} + +Expected<ObjectRef> OnDiskCAS::storeImpl(ArrayRef<uint8_t> ComputedHash, +                                         ArrayRef<ObjectRef> Refs, +                                         ArrayRef<char> Data) { +  SmallVector<ondisk::ObjectID, 64> IDs; +  IDs.reserve(Refs.size()); +  for (ObjectRef Ref : Refs) { +    IDs.push_back(convertRef(Ref)); +  } + +  auto StoredID = DB->getReference(ComputedHash); +  if (LLVM_UNLIKELY(!StoredID)) +    return StoredID.takeError(); +  if (Error E = DB->store(*StoredID, IDs, Data)) +    return std::move(E); +  return convertRef(*StoredID); +} + +Error OnDiskCAS::forEachRef(ObjectHandle Node, +                            function_ref<Error(ObjectRef)> Callback) const { +  auto RefsRange = DB->getObjectRefs(convertHandle(Node)); +  for (ondisk::ObjectID Ref : RefsRange) { +    if (Error E = Callback(convertRef(Ref))) +      return E; +  } +  return Error::success(); +} + +Error OnDiskCAS::setSizeLimit(std::optional<uint64_t> SizeLimit) { +  UnifiedDB->setSizeLimit(SizeLimit); +  return Error::success(); +} + +Expected<std::optional<uint64_t>> OnDiskCAS::getStorageSize() const { +  return UnifiedDB->getStorageSize(); +} + +Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); } + +Expected<std::unique_ptr<OnDiskCAS>> OnDiskCAS::open(StringRef AbsPath) { +  Expected<std::unique_ptr<ondisk::OnDiskGraphDB>> DB = +      ondisk::OnDiskGraphDB::open(AbsPath, BuiltinCASContext::getHashName(), +                                  sizeof(HashType)); +  if (!DB) +    return DB.takeError(); +  return std::unique_ptr<OnDiskCAS>(new OnDiskCAS(std::move(*DB))); +} + +bool cas::isOnDiskCASEnabled() { +#if LLVM_ENABLE_ONDISK_CAS +  return true; +#else +  return false; +#endif +} + +Expected<std::unique_ptr<ObjectStore>> cas::createOnDiskCAS(const Twine &Path) { +#if LLVM_ENABLE_ONDISK_CAS +  // FIXME: An absolute path isn't really good enough. Should open a directory +  // and use openat() for files underneath. +  SmallString<256> AbsPath; +  Path.toVector(AbsPath); +  sys::fs::make_absolute(AbsPath); + +  return OnDiskCAS::open(AbsPath); +#else +  return createStringError(inconvertibleErrorCode(), "OnDiskCAS is disabled"); +#endif /* LLVM_ENABLE_ONDISK_CAS */ +} + +std::unique_ptr<ObjectStore> +cas::builtin::createObjectStoreFromUnifiedOnDiskCache( +    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) { +  return std::make_unique<OnDiskCAS>(std::move(UniDB)); +} diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp index 64cbe9d..245b6fb 100644 --- a/llvm/lib/CAS/OnDiskGraphDB.cpp +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -893,6 +893,10 @@ int64_t DataRecordHandle::getDataRelOffset() const {  }  Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { +  if (UpstreamDB) { +    if (auto E = UpstreamDB->validate(Deep, Hasher)) +      return E; +  }    return Index.validate([&](FileOffset Offset,                              OnDiskTrieRawHashMap::ConstValueProxy Record)                              -> Error { @@ -1202,11 +1206,8 @@ OnDiskGraphDB::load(ObjectID ExternalRef) {      return I.takeError();    TrieRecord::Data Object = I->Ref.load(); -  if (Object.SK == TrieRecord::StorageKind::Unknown) { -    if (!UpstreamDB) -      return std::nullopt; +  if (Object.SK == TrieRecord::StorageKind::Unknown)      return faultInFromUpstream(ExternalRef); -  }    if (Object.SK == TrieRecord::StorageKind::DataPool)      return ObjectHandle::fromFileOffset(Object.Offset); @@ -1286,8 +1287,10 @@ OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,    TrieRecord::Data Object = I->Ref.load();    if (Object.SK != TrieRecord::StorageKind::Unknown)      return ObjectPresence::InPrimaryDB; +    if (!CheckUpstream || !UpstreamDB)      return ObjectPresence::Missing; +    std::optional<ObjectID> UpstreamID =        UpstreamDB->getExistingReference(getDigest(*I));    return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB @@ -1549,9 +1552,10 @@ unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const {    return std::max(IndexPercent, DataPercent);  } -Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open( -    StringRef AbsPath, StringRef HashName, unsigned HashByteSize, -    std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy) { +Expected<std::unique_ptr<OnDiskGraphDB>> +OnDiskGraphDB::open(StringRef AbsPath, StringRef HashName, +                    unsigned HashByteSize, OnDiskGraphDB *UpstreamDB, +                    FaultInPolicy Policy) {    if (std::error_code EC = sys::fs::create_directories(AbsPath))      return createFileError(AbsPath, EC); @@ -1604,18 +1608,15 @@ Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(                               "unexpected user header in '" + DataPoolPath +                                   "'"); -  return std::unique_ptr<OnDiskGraphDB>( -      new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool), -                        std::move(UpstreamDB), Policy)); +  return std::unique_ptr<OnDiskGraphDB>(new OnDiskGraphDB( +      AbsPath, std::move(*Index), std::move(*DataPool), UpstreamDB, Policy));  }  OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,                               OnDiskDataAllocator DataPool, -                             std::unique_ptr<OnDiskGraphDB> UpstreamDB, -                             FaultInPolicy Policy) +                             OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy)      : Index(std::move(Index)), DataPool(std::move(DataPool)), -      RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)), -      FIPolicy(Policy) { +      RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy) {    /// Lifetime for "big" objects not in DataPool.    ///    /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something @@ -1638,7 +1639,6 @@ Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,    // against the process dying during importing and leaving the database with an    // incomplete tree. Note that if the upstream has missing nodes then the tree    // will be copied with missing nodes as well, it won't be considered an error. -    struct UpstreamCursor {      ObjectHandle Node;      size_t RefsCount; @@ -1720,7 +1720,6 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,    // Copy the node data into the primary store.    // FIXME: Use hard-link or cloning if the file-system supports it and data is    // stored into a separate file. -    auto Data = UpstreamDB->getObjectData(UpstreamNode);    auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);    SmallVector<ObjectID, 64> Refs; @@ -1737,7 +1736,8 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,  Expected<std::optional<ObjectHandle>>  OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) { -  assert(UpstreamDB); +  if (!UpstreamDB) +    return std::nullopt;    auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));    if (LLVM_UNLIKELY(!UpstreamID)) diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp index 2186071..15656cb 100644 --- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp +++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp @@ -20,6 +20,7 @@  #include "llvm/CAS/OnDiskKeyValueDB.h"  #include "OnDiskCommon.h"  #include "llvm/ADT/StringExtras.h" +#include "llvm/CAS/UnifiedOnDiskCache.h"  #include "llvm/Support/Alignment.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Errc.h" @@ -53,15 +54,21 @@ Expected<std::optional<ArrayRef<char>>>  OnDiskKeyValueDB::get(ArrayRef<uint8_t> Key) {    // Check the result cache.    OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key); -  if (!ActionP) +  if (ActionP) { +    assert(isAddrAligned(Align(8), ActionP->Data.data())); +    return ActionP->Data; +  } +  if (!UnifiedCache || !UnifiedCache->UpstreamKVDB)      return std::nullopt; -  assert(isAddrAligned(Align(8), ActionP->Data.data())); -  return ActionP->Data; + +  // Try to fault in from upstream. +  return UnifiedCache->faultInFromUpstreamKV(Key);  }  Expected<std::unique_ptr<OnDiskKeyValueDB>>  OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, -                       StringRef ValueName, size_t ValueSize) { +                       StringRef ValueName, size_t ValueSize, +                       UnifiedOnDiskCache *Cache) {    if (std::error_code EC = sys::fs::create_directories(Path))      return createFileError(Path, EC); @@ -87,10 +94,14 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,      return std::move(E);    return std::unique_ptr<OnDiskKeyValueDB>( -      new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache))); +      new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache), Cache));  }  Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const { +  if (UnifiedCache && UnifiedCache->UpstreamKVDB) { +    if (auto E = UnifiedCache->UpstreamKVDB->validate(CheckValue)) +      return E; +  }    return Cache.validate(        [&](FileOffset Offset,            OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error { diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp new file mode 100644 index 0000000..ae9d818 --- /dev/null +++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp @@ -0,0 +1,613 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one +/// directory while also restricting storage growth with a scheme of chaining +/// the two most recent directories (primary & upstream), where the primary +/// "faults-in" data from the upstream one. When the primary (most recent) +/// directory exceeds its intended limit a new empty directory becomes the +/// primary one. +/// +/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open +/// receives) there are directories named like this: +/// +/// 'v<version>.<x>' +/// 'v<version>.<x+1>' +/// 'v<version>.<x+2>' +/// ... +/// +/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and +/// the part after the dot is an increasing integer. The primary directory is +/// the one with the highest integer and the upstream one is the directory +/// before it. For example, if the sub-directories contained are: +/// +/// 'v1.5', 'v1.6', 'v1.7', 'v1.8' +/// +/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are +/// unused directories that can be safely deleted at any time and by any +/// process. +/// +/// Contained within the top-level directory is a file named "lock" which is +/// used for processes to take shared or exclusive locks for the contents of the +/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock +/// for the top-level directory; when it closes, if the primary sub-directory +/// exceeded its limit, it attempts to get an exclusive lock in order to create +/// a new empty primary directory; if it can't get the exclusive lock it gives +/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt +/// again. +/// +/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a +/// directory, by any process, the storage size in that directory will keep +/// growing unrestricted. But the major benefit is that garbage-collection can +/// be triggered on a directory concurrently, at any time and by any process, +/// without affecting any active readers/writers in the same process or other +/// processes. +/// +/// The \c UnifiedOnDiskCache also provides validation and recovery on top of +/// the underlying on-disk storage. The low-level storage is designed to remain +/// coherent across regular process crashes, but may be invalid after power loss +/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows +/// validating the contents once per boot and can recover by marking invalid +/// data for garbage collection. +/// +/// The data recovery described above requires exclusive access to the CAS, and +/// it is an error to attempt recovery if the CAS is open in any process/thread. +/// In order to maximize backwards compatibility with tools that do not perform +/// validation before opening the CAS, we do not attempt to get exclusive access +/// until recovery is actually performed, meaning as long as the data is valid +/// it will not conflict with concurrent use. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/UnifiedOnDiskCache.h" +#include "BuiltinCAS.h" +#include "OnDiskCommon.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CAS/ActionCache.h" +#include "llvm/CAS/OnDiskGraphDB.h" +#include "llvm/CAS/OnDiskKeyValueDB.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include <optional> + +#if __has_include(<sys/sysctl.h>) +#include <sys/sysctl.h> +#endif + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +/// FIXME: When the version of \p DBDirPrefix is bumped up we need to figure out +/// how to handle the leftover sub-directories of the previous version, within +/// the \p UnifiedOnDiskCache::collectGarbage function. +static constexpr StringLiteral DBDirPrefix = "v1."; + +static constexpr StringLiteral ValidationFilename = "v1.validation"; +static constexpr StringLiteral CorruptPrefix = "corrupt."; + +ObjectID UnifiedOnDiskCache::getObjectIDFromValue(ArrayRef<char> Value) { +  // little endian encoded. +  assert(Value.size() == sizeof(uint64_t)); +  return ObjectID::fromOpaqueData(support::endian::read64le(Value.data())); +} + +UnifiedOnDiskCache::ValueBytes +UnifiedOnDiskCache::getValueFromObjectID(ObjectID ID) { +  // little endian encoded. +  UnifiedOnDiskCache::ValueBytes ValBytes; +  static_assert(ValBytes.size() == sizeof(ID.getOpaqueData())); +  support::endian::write64le(ValBytes.data(), ID.getOpaqueData()); +  return ValBytes; +} + +Expected<std::optional<ArrayRef<char>>> +UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef<uint8_t> Key) { +  assert(UpstreamGraphDB); +  assert(UpstreamKVDB); + +  std::optional<ArrayRef<char>> UpstreamValue; +  if (Error E = UpstreamKVDB->get(Key).moveInto(UpstreamValue)) +    return std::move(E); +  if (!UpstreamValue) +    return std::nullopt; + +  // The value is the \p ObjectID in the context of the upstream +  // \p OnDiskGraphDB instance. Translate it to the context of the primary +  // \p OnDiskGraphDB instance. +  ObjectID UpstreamID = getObjectIDFromValue(*UpstreamValue); +  auto PrimaryID = +      PrimaryGraphDB->getReference(UpstreamGraphDB->getDigest(UpstreamID)); +  if (LLVM_UNLIKELY(!PrimaryID)) +    return PrimaryID.takeError(); +  return PrimaryKVDB->put(Key, getValueFromObjectID(*PrimaryID)); +} + +/// \returns all the 'v<version>.<x>' names of sub-directories, sorted with +/// ascending order of the integer after the dot. Corrupt directories, if +/// included, will come first. +static Expected<SmallVector<std::string, 4>> +getAllDBDirs(StringRef Path, bool IncludeCorrupt = false) { +  struct DBDir { +    uint64_t Order; +    std::string Name; +  }; +  SmallVector<DBDir> FoundDBDirs; + +  std::error_code EC; +  for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE; +       DirI.increment(EC)) { +    if (DirI->type() != sys::fs::file_type::directory_file) +      continue; +    StringRef SubDir = sys::path::filename(DirI->path()); +    if (IncludeCorrupt && SubDir.starts_with(CorruptPrefix)) { +      FoundDBDirs.push_back({0, std::string(SubDir)}); +      continue; +    } +    if (!SubDir.starts_with(DBDirPrefix)) +      continue; +    uint64_t Order; +    if (SubDir.substr(DBDirPrefix.size()).getAsInteger(10, Order)) +      return createStringError(inconvertibleErrorCode(), +                               "unexpected directory " + DirI->path()); +    FoundDBDirs.push_back({Order, std::string(SubDir)}); +  } +  if (EC) +    return createFileError(Path, EC); + +  llvm::sort(FoundDBDirs, [](const DBDir &LHS, const DBDir &RHS) -> bool { +    return LHS.Order <= RHS.Order; +  }); + +  SmallVector<std::string, 4> DBDirs; +  for (DBDir &Dir : FoundDBDirs) +    DBDirs.push_back(std::move(Dir.Name)); +  return DBDirs; +} + +static Expected<SmallVector<std::string, 4>> getAllGarbageDirs(StringRef Path) { +  auto DBDirs = getAllDBDirs(Path, /*IncludeCorrupt=*/true); +  if (!DBDirs) +    return DBDirs.takeError(); + +  // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure +  // out how to handle the leftover sub-directories of the previous version. + +  for (unsigned Keep = 2; Keep > 0 && !DBDirs->empty(); --Keep) { +    StringRef Back(DBDirs->back()); +    if (Back.starts_with(CorruptPrefix)) +      break; +    DBDirs->pop_back(); +  } +  return *DBDirs; +} + +/// \returns Given a sub-directory named 'v<version>.<x>', it outputs the +/// 'v<version>.<x+1>' name. +static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) { +  assert(DBDir.starts_with(DBDirPrefix)); +  uint64_t Count; +  bool Failed = DBDir.substr(DBDirPrefix.size()).getAsInteger(10, Count); +  assert(!Failed); +  (void)Failed; +  OS << DBDirPrefix << Count + 1; +} + +static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath, +                                  bool CheckHash) { +  SmallVector<StringRef> Args{LLVMCasBinary, "-cas", RootPath, "-validate"}; +  if (CheckHash) +    Args.push_back("-check-hash"); + +  llvm::SmallString<128> StdErrPath; +  int StdErrFD = -1; +  if (std::error_code EC = sys::fs::createTemporaryFile( +          "llvm-cas-validate-stderr", "txt", StdErrFD, StdErrPath, +          llvm::sys::fs::OF_Text)) +    return createStringError(EC, "failed to create temporary file"); +  FileRemover OutputRemover(StdErrPath.c_str()); + +  std::optional<llvm::StringRef> Redirects[] = { +      {""}, // stdin = /dev/null +      {""}, // stdout = /dev/null +      StdErrPath.str(), +  }; + +  std::string ErrMsg; +  int Result = +      sys::ExecuteAndWait(LLVMCasBinary, Args, /*Env=*/std::nullopt, Redirects, +                          /*SecondsToWait=*/120, /*MemoryLimit=*/0, &ErrMsg); + +  if (Result == -1) +    return createStringError("failed to exec " + join(Args, " ") + ": " + +                             ErrMsg); +  if (Result != 0) { +    llvm::SmallString<64> Err("cas contents invalid"); +    if (!ErrMsg.empty()) { +      Err += ": "; +      Err += ErrMsg; +    } +    auto StdErrBuf = MemoryBuffer::getFile(StdErrPath.c_str()); +    if (StdErrBuf && !(*StdErrBuf)->getBuffer().empty()) { +      Err += ": "; +      Err += (*StdErrBuf)->getBuffer(); +    } +    return createStringError(Err); +  } +  return Error::success(); +} + +static Error validateInProcess(StringRef RootPath, StringRef HashName, +                               unsigned HashByteSize, bool CheckHash) { +  std::shared_ptr<UnifiedOnDiskCache> UniDB; +  if (Error E = UnifiedOnDiskCache::open(RootPath, std::nullopt, HashName, +                                         HashByteSize) +                    .moveInto(UniDB)) +    return E; +  auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB); +  if (Error E = CAS->validate(CheckHash)) +    return E; +  auto Cache = builtin::createActionCacheFromUnifiedOnDiskCache(UniDB); +  if (Error E = Cache->validate()) +    return E; +  return Error::success(); +} + +static Expected<uint64_t> getBootTime() { +#if __has_include(<sys/sysctl.h>) && defined(KERN_BOOTTIME) +  struct timeval TV; +  size_t TVLen = sizeof(TV); +  int KernBoot[2] = {CTL_KERN, KERN_BOOTTIME}; +  if (sysctl(KernBoot, 2, &TV, &TVLen, nullptr, 0) < 0) +    return createStringError(llvm::errnoAsErrorCode(), +                             "failed to get boottime"); +  if (TVLen != sizeof(TV)) +    return createStringError("sysctl kern.boottime unexpected format"); +  return TV.tv_sec; +#elif defined(__linux__) +  // Use the mtime for /proc, which is recreated during system boot. +  // We could also read /proc/stat and search for 'btime'. +  sys::fs::file_status Status; +  if (std::error_code EC = sys::fs::status("/proc", Status)) +    return createFileError("/proc", EC); +  return Status.getLastModificationTime().time_since_epoch().count(); +#else +  llvm::report_fatal_error("getBootTime unimplemented"); +#endif +} + +Expected<ValidationResult> UnifiedOnDiskCache::validateIfNeeded( +    StringRef RootPath, StringRef HashName, unsigned HashByteSize, +    bool CheckHash, bool AllowRecovery, bool ForceValidation, +    std::optional<StringRef> LLVMCasBinaryPath) { +  if (std::error_code EC = sys::fs::create_directories(RootPath)) +    return createFileError(RootPath, EC); + +  SmallString<256> PathBuf(RootPath); +  sys::path::append(PathBuf, ValidationFilename); +  int FD = -1; +  if (std::error_code EC = sys::fs::openFileForReadWrite( +          PathBuf, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) +    return createFileError(PathBuf, EC); +  assert(FD != -1); + +  sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); +  auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(File); }); + +  if (std::error_code EC = lockFileThreadSafe(FD, sys::fs::LockKind::Exclusive)) +    return createFileError(PathBuf, EC); +  auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(FD); }); + +  SmallString<8> Bytes; +  if (Error E = sys::fs::readNativeFileToEOF(File, Bytes)) +    return createFileError(PathBuf, std::move(E)); + +  uint64_t ValidationBootTime = 0; +  if (!Bytes.empty() && +      StringRef(Bytes).trim().getAsInteger(10, ValidationBootTime)) +    return createFileError(PathBuf, errc::illegal_byte_sequence, +                           "expected integer"); + +  static uint64_t BootTime = 0; +  if (BootTime == 0) +    if (Error E = getBootTime().moveInto(BootTime)) +      return std::move(E); + +  std::string LogValidationError; + +  if (ValidationBootTime == BootTime && !ForceValidation) +    return ValidationResult::Skipped; + +  // Validate! +  bool NeedsRecovery = false; +  if (Error E = +          LLVMCasBinaryPath +              ? validateOutOfProcess(*LLVMCasBinaryPath, RootPath, CheckHash) +              : validateInProcess(RootPath, HashName, HashByteSize, +                                  CheckHash)) { +    if (AllowRecovery) { +      consumeError(std::move(E)); +      NeedsRecovery = true; +    } else { +      return std::move(E); +    } +  } + +  if (NeedsRecovery) { +    sys::path::remove_filename(PathBuf); +    sys::path::append(PathBuf, "lock"); + +    int LockFD = -1; +    if (std::error_code EC = sys::fs::openFileForReadWrite( +            PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) +      return createFileError(PathBuf, EC); +    sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD); +    auto CloseLock = make_scope_exit([&]() { sys::fs::closeFile(LockFile); }); +    if (std::error_code EC = tryLockFileThreadSafe(LockFD)) { +      if (EC == std::errc::no_lock_available) +        return createFileError( +            PathBuf, EC, +            "CAS validation requires exclusive access but CAS was in use"); +      return createFileError(PathBuf, EC); +    } +    auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); + +    auto DBDirs = getAllDBDirs(RootPath); +    if (!DBDirs) +      return DBDirs.takeError(); + +    for (StringRef DBDir : *DBDirs) { +      sys::path::remove_filename(PathBuf); +      sys::path::append(PathBuf, DBDir); +      std::error_code EC; +      int Attempt = 0, MaxAttempts = 100; +      SmallString<128> GCPath; +      for (; Attempt < MaxAttempts; ++Attempt) { +        GCPath.assign(RootPath); +        sys::path::append(GCPath, CorruptPrefix + std::to_string(Attempt) + +                                      "." + DBDir); +        EC = sys::fs::rename(PathBuf, GCPath); +        // Darwin uses ENOTEMPTY. Linux may return either ENOTEMPTY or EEXIST. +        if (EC != errc::directory_not_empty && EC != errc::file_exists) +          break; +      } +      if (Attempt == MaxAttempts) +        return createStringError( +            EC, "rename " + PathBuf + +                    " failed: too many CAS directories awaiting pruning"); +      if (EC) +        return createStringError(EC, "rename " + PathBuf + " to " + GCPath + +                                         " failed: " + EC.message()); +    } +  } + +  if (ValidationBootTime != BootTime) { +    // Fix filename in case we have error to report. +    sys::path::remove_filename(PathBuf); +    sys::path::append(PathBuf, ValidationFilename); +    if (std::error_code EC = sys::fs::resize_file(FD, 0)) +      return createFileError(PathBuf, EC); +    raw_fd_ostream OS(FD, /*shouldClose=*/false); +    OS.seek(0); // resize does not reset position +    OS << BootTime << '\n'; +    if (OS.has_error()) +      return createFileError(PathBuf, OS.error()); +  } + +  return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid; +} + +Expected<std::unique_ptr<UnifiedOnDiskCache>> +UnifiedOnDiskCache::open(StringRef RootPath, std::optional<uint64_t> SizeLimit, +                         StringRef HashName, unsigned HashByteSize, +                         OnDiskGraphDB::FaultInPolicy FaultInPolicy) { +  if (std::error_code EC = sys::fs::create_directories(RootPath)) +    return createFileError(RootPath, EC); + +  SmallString<256> PathBuf(RootPath); +  sys::path::append(PathBuf, "lock"); +  int LockFD = -1; +  if (std::error_code EC = sys::fs::openFileForReadWrite( +          PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) +    return createFileError(PathBuf, EC); +  assert(LockFD != -1); +  // Locking the directory using shared lock, which will prevent other processes +  // from creating a new chain (essentially while a \p UnifiedOnDiskCache +  // instance holds a shared lock the storage for the primary directory will +  // grow unrestricted). +  if (std::error_code EC = +          lockFileThreadSafe(LockFD, sys::fs::LockKind::Shared)) +    return createFileError(PathBuf, EC); + +  auto DBDirs = getAllDBDirs(RootPath); +  if (!DBDirs) +    return DBDirs.takeError(); +  if (DBDirs->empty()) +    DBDirs->push_back((Twine(DBDirPrefix) + "1").str()); + +  assert(!DBDirs->empty()); + +  /// If there is only one directory open databases on it. If there are 2 or +  /// more directories, get the most recent directories and chain them, with the +  /// most recent being the primary one. The remaining directories are unused +  /// data than can be garbage-collected. +  auto UniDB = std::unique_ptr<UnifiedOnDiskCache>(new UnifiedOnDiskCache()); +  std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB; +  std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB; +  if (DBDirs->size() > 1) { +    StringRef UpstreamDir = *(DBDirs->end() - 2); +    PathBuf = RootPath; +    sys::path::append(PathBuf, UpstreamDir); +    if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize, +                                      /*UpstreamDB=*/nullptr, FaultInPolicy) +                      .moveInto(UpstreamGraphDB)) +      return std::move(E); +    if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize, +                                         /*ValueName=*/"objectid", +                                         /*ValueSize=*/sizeof(uint64_t)) +                      .moveInto(UpstreamKVDB)) +      return std::move(E); +  } + +  StringRef PrimaryDir = *(DBDirs->end() - 1); +  PathBuf = RootPath; +  sys::path::append(PathBuf, PrimaryDir); +  std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB; +  if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize, +                                    UpstreamGraphDB.get(), FaultInPolicy) +                    .moveInto(PrimaryGraphDB)) +    return std::move(E); +  std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB; +  // \p UnifiedOnDiskCache does manual chaining for key-value requests, +  // including an extra translation step of the value during fault-in. +  if (Error E = +          OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize, +                                 /*ValueName=*/"objectid", +                                 /*ValueSize=*/sizeof(uint64_t), UniDB.get()) +              .moveInto(PrimaryKVDB)) +    return std::move(E); + +  UniDB->RootPath = RootPath; +  UniDB->SizeLimit = SizeLimit.value_or(0); +  UniDB->LockFD = LockFD; +  UniDB->NeedsGarbageCollection = DBDirs->size() > 2; +  UniDB->PrimaryDBDir = PrimaryDir; +  UniDB->UpstreamGraphDB = std::move(UpstreamGraphDB); +  UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB); +  UniDB->UpstreamKVDB = std::move(UpstreamKVDB); +  UniDB->PrimaryKVDB = std::move(PrimaryKVDB); + +  return std::move(UniDB); +} + +void UnifiedOnDiskCache::setSizeLimit(std::optional<uint64_t> SizeLimit) { +  this->SizeLimit = SizeLimit.value_or(0); +} + +uint64_t UnifiedOnDiskCache::getStorageSize() const { +  uint64_t TotalSize = getPrimaryStorageSize(); +  if (UpstreamGraphDB) +    TotalSize += UpstreamGraphDB->getStorageSize(); +  if (UpstreamKVDB) +    TotalSize += UpstreamKVDB->getStorageSize(); +  return TotalSize; +} + +uint64_t UnifiedOnDiskCache::getPrimaryStorageSize() const { +  return PrimaryGraphDB->getStorageSize() + PrimaryKVDB->getStorageSize(); +} + +bool UnifiedOnDiskCache::hasExceededSizeLimit() const { +  uint64_t CurSizeLimit = SizeLimit; +  if (!CurSizeLimit) +    return false; + +  // If the hard limit is beyond 85%, declare above limit and request clean up. +  unsigned CurrentPercent = +      std::max(PrimaryGraphDB->getHardStorageLimitUtilization(), +               PrimaryKVDB->getHardStorageLimitUtilization()); +  if (CurrentPercent > 85) +    return true; + +  // We allow each of the directories in the chain to reach up to half the +  // intended size limit. Check whether the primary directory has exceeded half +  // the limit or not, in order to decide whether we need to start a new chain. +  // +  // We could check the size limit against the sum of sizes of both the primary +  // and upstream directories but then if the upstream is significantly larger +  // than the intended limit, it would trigger a new chain to be created before +  // the primary has reached its own limit. Essentially in such situation we +  // prefer reclaiming the storage later in order to have more consistent cache +  // hits behavior. +  return (CurSizeLimit / 2) < getPrimaryStorageSize(); +} + +Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { +  if (LockFD == -1) +    return Error::success(); // already closed. +  auto CloseLock = make_scope_exit([&]() { +    assert(LockFD >= 0); +    sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD); +    sys::fs::closeFile(LockFile); +    LockFD = -1; +  }); + +  bool ExceededSizeLimit = CheckSizeLimit ? hasExceededSizeLimit() : false; +  UpstreamKVDB.reset(); +  PrimaryKVDB.reset(); +  UpstreamGraphDB.reset(); +  PrimaryGraphDB.reset(); +  if (std::error_code EC = unlockFileThreadSafe(LockFD)) +    return createFileError(RootPath, EC); + +  if (!ExceededSizeLimit) +    return Error::success(); + +  // The primary directory exceeded its intended size limit. Try to get an +  // exclusive lock in order to create a new primary directory for next time +  // this \p UnifiedOnDiskCache path is opened. + +  if (std::error_code EC = tryLockFileThreadSafe( +          LockFD, std::chrono::milliseconds(0), sys::fs::LockKind::Exclusive)) { +    if (EC == errc::no_lock_available) +      return Error::success(); // couldn't get exclusive lock, give up. +    return createFileError(RootPath, EC); +  } +  auto UnlockFile = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); + +  // Managed to get an exclusive lock which means there are no other open +  // \p UnifiedOnDiskCache instances for the same path, so we can safely start a +  // new primary directory. To start a new primary directory we just have to +  // create a new empty directory with the next consecutive index; since this is +  // an atomic operation we will leave the top-level directory in a consistent +  // state even if the process dies during this code-path. + +  SmallString<256> PathBuf(RootPath); +  raw_svector_ostream OS(PathBuf); +  OS << sys::path::get_separator(); +  getNextDBDirName(PrimaryDBDir, OS); +  if (std::error_code EC = sys::fs::create_directory(PathBuf)) +    return createFileError(PathBuf, EC); + +  NeedsGarbageCollection = true; +  return Error::success(); +} + +UnifiedOnDiskCache::UnifiedOnDiskCache() = default; + +UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(close()); } + +Error UnifiedOnDiskCache::collectGarbage(StringRef Path) { +  auto DBDirs = getAllGarbageDirs(Path); +  if (!DBDirs) +    return DBDirs.takeError(); + +  SmallString<256> PathBuf(Path); +  for (StringRef UnusedSubDir : *DBDirs) { +    sys::path::append(PathBuf, UnusedSubDir); +    if (std::error_code EC = sys::fs::remove_directories(PathBuf)) +      return createFileError(PathBuf, EC); +    sys::path::remove_filename(PathBuf); +  } +  return Error::success(); +} + +Error UnifiedOnDiskCache::collectGarbage() { return collectGarbage(RootPath); } diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index e5c85d5..1ea30d8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -745,11 +745,6 @@ void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const {    Asm->emitInt32(QualifiedNameHash);  } -constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[]; -constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[]; -constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[]; -constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[]; -  #ifndef NDEBUG  void AppleAccelTableWriter::Header::print(raw_ostream &OS) const {    OS << "Magic: " << format("0x%x", Magic) << "\n" diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 4b4df98..637acd6 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -109,8 +109,10 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,    if (auto *CI = dyn_cast<ConstantInt>(NumericConstant)) {      if (CI->getBitWidth() > 64)        MIB.addCImm(CI); -    else +    else if (CI->getBitWidth() == 1)        MIB.addImm(CI->getZExtValue()); +    else +      MIB.addImm(CI->getSExtValue());    } else if (auto *CFP = dyn_cast<ConstantFP>(NumericConstant)) {      MIB.addFPImm(CFP);    } else if (isa<ConstantPointerNull>(NumericConstant)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index bb10cf6..d84c3fb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -733,6 +733,8 @@ MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) {    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {      if (CI->getBitWidth() > 64)        return MachineOperand::CreateCImm(CI); +    if (CI->getBitWidth() == 1) +      return MachineOperand::CreateImm(CI->getZExtValue());      return MachineOperand::CreateImm(CI->getSExtValue());    }    if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index a522650..fa0c899 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8958,9 +8958,8 @@ bool SelectionDAGBuilder::canTailCall(const CallBase &CB) const {    // Avoid emitting tail calls in functions with the disable-tail-calls    // attribute.    const Function *Caller = CB.getParent()->getParent(); -  if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == -          "true" && -      !isMustTailCall) +  if (!isMustTailCall && +      Caller->getFnAttribute("disable-tail-calls").getValueAsBool())      return false;    // We can't tail call inside a function with a swifterror argument. Lowering diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h index 84757ae..970abdc 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h @@ -28,7 +28,7 @@ using MacroOffset2UnitMapTy = DenseMap<uint64_t, DwarfUnit *>;  /// Base class for all Dwarf units(Compile unit/Type table unit).  class DwarfUnit : public OutputSections {  public: -  virtual ~DwarfUnit() {} +  virtual ~DwarfUnit() = default;    DwarfUnit(LinkingGlobalData &GlobalData, unsigned ID,              StringRef ClangModuleName)        : OutputSections(GlobalData), ID(ID), ClangModuleName(ClangModuleName), diff --git a/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h b/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h index f67536e..8ccb4a5 100644 --- a/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h +++ b/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h @@ -22,7 +22,7 @@ class StringEntryToDwarfStringPoolEntryMap {  public:    StringEntryToDwarfStringPoolEntryMap(LinkingGlobalData &GlobalData)        : GlobalData(GlobalData) {} -  ~StringEntryToDwarfStringPoolEntryMap() {} +  ~StringEntryToDwarfStringPoolEntryMap() = default;    /// Create DwarfStringPoolEntry for specified StringEntry if necessary.    /// Initialize DwarfStringPoolEntry with initial values. diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index 7e606c6a..4e7db82 100644 --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -27,7 +27,7 @@  namespace llvm {  namespace orc { -MemoryMapper::~MemoryMapper() {} +MemoryMapper::~MemoryMapper() = default;  InProcessMemoryMapper::InProcessMemoryMapper(size_t PageSize)      : PageSize(PageSize) {} diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 0e5926f..fff9a81 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -528,7 +528,7 @@ void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,    Value *Version = Builder.getInt32(OMP_KERNEL_ARG_VERSION);    Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);    auto Int32Ty = Type::getInt32Ty(Builder.getContext()); -  constexpr const size_t MaxDim = 3; +  constexpr size_t MaxDim = 3;    Value *ZeroArray = Constant::getNullValue(ArrayType::get(Int32Ty, MaxDim));    Value *Flags = Builder.getInt64(KernelArgs.HasNoWait); diff --git a/llvm/lib/IR/ConstantsContext.h b/llvm/lib/IR/ConstantsContext.h index 51fb40b..e3e8d89 100644 --- a/llvm/lib/IR/ConstantsContext.h +++ b/llvm/lib/IR/ConstantsContext.h @@ -535,7 +535,7 @@ struct ConstantPtrAuthKeyType {    unsigned getHash() const { return hash_combine_range(Operands); } -  using TypeClass = typename ConstantInfo<ConstantPtrAuth>::TypeClass; +  using TypeClass = ConstantInfo<ConstantPtrAuth>::TypeClass;    ConstantPtrAuth *create(TypeClass *Ty) const {      return new ConstantPtrAuth(Operands[0], cast<ConstantInt>(Operands[1]), diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index 62fd62c..3394754 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -34,8 +34,6 @@ static cl::opt<bool> ImportConstantsWithRefs(      "import-constants-with-refs", cl::init(true), cl::Hidden,      cl::desc("Import constant global variables with references")); -constexpr uint32_t FunctionSummary::ParamAccess::RangeWidth; -  FunctionSummary FunctionSummary::ExternalNode =      FunctionSummary::makeDummyFunctionSummary(          SmallVector<FunctionSummary::EdgeTy, 0>()); @@ -88,8 +86,6 @@ std::pair<unsigned, unsigned> FunctionSummary::specialRefCounts() const {    return {RORefCnt, WORefCnt};  } -constexpr uint64_t ModuleSummaryIndex::BitcodeSummaryVersion; -  uint64_t ModuleSummaryIndex::getFlags() const {    uint64_t Flags = 0;    // Flags & 0x4 is reserved. DO NOT REUSE. diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index b618222..23be42f 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1076,63 +1076,59 @@ Expected<ArrayRef<SymbolResolution>>  LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,                  ArrayRef<SymbolResolution> Res) {    llvm::TimeTraceScope timeScope("LTO add thin LTO"); +  const auto BMID = BM.getModuleIdentifier();    ArrayRef<SymbolResolution> ResTmp = Res;    for (const InputFile::Symbol &Sym : Syms) {      assert(!ResTmp.empty());      const SymbolResolution &R = ResTmp.consume_front(); -    if (!Sym.getIRName().empty()) { +    if (!Sym.getIRName().empty() && R.Prevailing) {        auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(            GlobalValue::getGlobalIdentifier(Sym.getIRName(),                                             GlobalValue::ExternalLinkage, "")); -      if (R.Prevailing) -        ThinLTO.setPrevailingModuleForGUID(GUID, BM.getModuleIdentifier()); +      ThinLTO.setPrevailingModuleForGUID(GUID, BMID);      }    } -  if (Error Err = -          BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(), -                         [&](GlobalValue::GUID GUID) { -                           return ThinLTO.isPrevailingModuleForGUID( -                               GUID, BM.getModuleIdentifier()); -                         })) +  if (Error Err = BM.readSummary( +          ThinLTO.CombinedIndex, BMID, [&](GlobalValue::GUID GUID) { +            return ThinLTO.isPrevailingModuleForGUID(GUID, BMID); +          }))      return Err; -  LLVM_DEBUG(dbgs() << "Module " << BM.getModuleIdentifier() << "\n"); +  LLVM_DEBUG(dbgs() << "Module " << BMID << "\n");    for (const InputFile::Symbol &Sym : Syms) {      assert(!Res.empty());      const SymbolResolution &R = Res.consume_front(); -    if (!Sym.getIRName().empty()) { +    if (!Sym.getIRName().empty() && +        (R.Prevailing || R.FinalDefinitionInLinkageUnit)) {        auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(            GlobalValue::getGlobalIdentifier(Sym.getIRName(),                                             GlobalValue::ExternalLinkage, ""));        if (R.Prevailing) { -        assert( -            ThinLTO.isPrevailingModuleForGUID(GUID, BM.getModuleIdentifier())); +        assert(ThinLTO.isPrevailingModuleForGUID(GUID, BMID));          // For linker redefined symbols (via --wrap or --defsym) we want to          // switch the linkage to `weak` to prevent IPOs from happening.          // Find the summary in the module for this very GV and record the new          // linkage so that we can switch it when we import the GV.          if (R.LinkerRedefined) -          if (auto S = ThinLTO.CombinedIndex.findSummaryInModule( -                  GUID, BM.getModuleIdentifier())) +          if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(GUID, BMID))              S->setLinkage(GlobalValue::WeakAnyLinkage);        }        // If the linker resolved the symbol to a local definition then mark it        // as local in the summary for the module we are adding.        if (R.FinalDefinitionInLinkageUnit) { -        if (auto S = ThinLTO.CombinedIndex.findSummaryInModule( -                GUID, BM.getModuleIdentifier())) { +        if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(GUID, BMID)) {            S->setDSOLocal(true);          }        }      }    } -  if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second) +  if (!ThinLTO.ModuleMap.insert({BMID, BM}).second)      return make_error<StringError>(          "Expected at most one ThinLTO module per bitcode file",          inconvertibleErrorCode()); @@ -1143,10 +1139,10 @@ LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,      // This is a fuzzy name matching where only modules with name containing the      // specified switch values are going to be compiled.      for (const std::string &Name : Conf.ThinLTOModulesToCompile) { -      if (BM.getModuleIdentifier().contains(Name)) { -        ThinLTO.ModulesToCompile->insert({BM.getModuleIdentifier(), BM}); -        LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BM.getModuleIdentifier() -                          << " to compile\n"); +      if (BMID.contains(Name)) { +        ThinLTO.ModulesToCompile->insert({BMID, BM}); +        LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BMID << " to compile\n"); +        break;        }      }    } diff --git a/llvm/lib/MC/GOFFObjectWriter.cpp b/llvm/lib/MC/GOFFObjectWriter.cpp index 71bd397..a3eaaa7 100644 --- a/llvm/lib/MC/GOFFObjectWriter.cpp +++ b/llvm/lib/MC/GOFFObjectWriter.cpp @@ -520,7 +520,7 @@ GOFFObjectWriter::GOFFObjectWriter(      std::unique_ptr<MCGOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)      : TargetObjectWriter(std::move(MOTW)), OS(OS) {} -GOFFObjectWriter::~GOFFObjectWriter() {} +GOFFObjectWriter::~GOFFObjectWriter() = default;  uint64_t GOFFObjectWriter::writeObject() {    uint64_t Size = GOFFWriter(OS, *Asm).writeObject(); diff --git a/llvm/lib/MC/MCDXContainerWriter.cpp b/llvm/lib/MC/MCDXContainerWriter.cpp index 5eda039..ebed411 100644 --- a/llvm/lib/MC/MCDXContainerWriter.cpp +++ b/llvm/lib/MC/MCDXContainerWriter.cpp @@ -16,7 +16,7 @@  using namespace llvm; -MCDXContainerTargetWriter::~MCDXContainerTargetWriter() {} +MCDXContainerTargetWriter::~MCDXContainerTargetWriter() = default;  uint64_t DXContainerObjectWriter::writeObject() {    auto &Asm = *this->Asm; diff --git a/llvm/lib/MC/MCGOFFStreamer.cpp b/llvm/lib/MC/MCGOFFStreamer.cpp index 8b228db..ad6397b 100644 --- a/llvm/lib/MC/MCGOFFStreamer.cpp +++ b/llvm/lib/MC/MCGOFFStreamer.cpp @@ -20,7 +20,7 @@  using namespace llvm; -MCGOFFStreamer::~MCGOFFStreamer() {} +MCGOFFStreamer::~MCGOFFStreamer() = default;  GOFFObjectWriter &MCGOFFStreamer::getWriter() {    return static_cast<GOFFObjectWriter &>(getAssembler().getWriter()); diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.h b/llvm/lib/ObjCopy/COFF/COFFWriter.h index 66d7f01..3ee0e06 100644 --- a/llvm/lib/ObjCopy/COFF/COFFWriter.h +++ b/llvm/lib/ObjCopy/COFF/COFFWriter.h @@ -50,7 +50,7 @@ class COFFWriter {    Expected<uint32_t> virtualAddressToFileAddress(uint32_t RVA);  public: -  virtual ~COFFWriter() {} +  virtual ~COFFWriter() = default;    Error write();    COFFWriter(Object &Obj, raw_ostream &Out) diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h index 4f6473f..2783ef27 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObject.h +++ b/llvm/lib/ObjCopy/ELF/ELFObject.h @@ -134,7 +134,7 @@ private:    using Elf_Sym = typename ELFT::Sym;  public: -  ~ELFSectionWriter() override {} +  ~ELFSectionWriter() override = default;    Error visit(const SymbolTableSection &Sec) override;    Error visit(const RelocationSection &Sec) override;    Error visit(const GnuDebugLinkSection &Sec) override; @@ -180,7 +180,7 @@ public:  class BinarySectionWriter : public SectionWriter {  public: -  ~BinarySectionWriter() override {} +  ~BinarySectionWriter() override = default;    Error visit(const SymbolTableSection &Sec) override;    Error visit(const RelocationSection &Sec) override; @@ -346,7 +346,7 @@ private:    size_t totalSize() const;  public: -  ~ELFWriter() override {} +  ~ELFWriter() override = default;    bool WriteSectionHeaders;    // For --only-keep-debug, select an alternative section/segment layout @@ -367,7 +367,7 @@ private:    uint64_t TotalSize = 0;  public: -  ~BinaryWriter() override {} +  ~BinaryWriter() override = default;    Error finalize() override;    Error write() override;    BinaryWriter(Object &Obj, raw_ostream &Out, const CommonConfig &Config) @@ -784,7 +784,7 @@ private:    SymbolTableSection *Symbols = nullptr;  public: -  ~SectionIndexSection() override {} +  ~SectionIndexSection() override = default;    void addIndex(uint32_t Index) {      assert(Size > 0);      Indexes.push_back(Index); diff --git a/llvm/lib/ObjCopy/MachO/MachOReader.h b/llvm/lib/ObjCopy/MachO/MachOReader.h index e315e6fd..940ba4c 100644 --- a/llvm/lib/ObjCopy/MachO/MachOReader.h +++ b/llvm/lib/ObjCopy/MachO/MachOReader.h @@ -23,7 +23,7 @@ namespace macho {  // raw binaries and regular MachO object files.  class Reader {  public: -  virtual ~Reader(){}; +  virtual ~Reader() = default;    virtual Expected<std::unique_ptr<Object>> create() const = 0;  }; diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h index 8620548..47639ad 100644 --- a/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h +++ b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h @@ -20,7 +20,7 @@ namespace xcoff {  class XCOFFWriter {  public: -  virtual ~XCOFFWriter() {} +  virtual ~XCOFFWriter() = default;    XCOFFWriter(Object &Obj, raw_ostream &Out) : Obj(Obj), Out(Out) {}    Error write(); diff --git a/llvm/lib/ObjectYAML/GOFFYAML.cpp b/llvm/lib/ObjectYAML/GOFFYAML.cpp index 60bc1f7..ecd7fb6 100644 --- a/llvm/lib/ObjectYAML/GOFFYAML.cpp +++ b/llvm/lib/ObjectYAML/GOFFYAML.cpp @@ -15,7 +15,7 @@  namespace llvm {  namespace GOFFYAML { -Object::Object() {} +Object::Object() = default;  } // namespace GOFFYAML diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 7290a86..6b7e980 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -537,7 +537,7 @@ void IRChangedPrinter::handleAfter(StringRef PassID, std::string &Name,    Out << "*** IR Dump After " << PassID << " on " << Name << " ***\n" << After;  } -IRChangedTester::~IRChangedTester() {} +IRChangedTester::~IRChangedTester() = default;  void IRChangedTester::registerCallbacks(PassInstrumentationCallbacks &PIC) {    if (TestChanged != "") @@ -1566,7 +1566,7 @@ void InLineChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {      TextChangeReporter<IRDataT<EmptyData>>::registerRequiredCallbacks(PIC);  } -TimeProfilingPassesHandler::TimeProfilingPassesHandler() {} +TimeProfilingPassesHandler::TimeProfilingPassesHandler() = default;  void TimeProfilingPassesHandler::registerCallbacks(      PassInstrumentationCallbacks &PIC) { diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp index fb6ff62..6f5d072 100644 --- a/llvm/lib/SandboxIR/Context.cpp +++ b/llvm/lib/SandboxIR/Context.cpp @@ -637,7 +637,7 @@ Context::Context(LLVMContext &LLVMCtx)      : LLVMCtx(LLVMCtx), IRTracker(*this),        LLVMIRBuilder(LLVMCtx, ConstantFolder()) {} -Context::~Context() {} +Context::~Context() = default;  void Context::clear() {    // TODO: Ideally we should clear only function-scope objects, and keep global diff --git a/llvm/lib/Support/BalancedPartitioning.cpp b/llvm/lib/Support/BalancedPartitioning.cpp index 1914f4c..d859abd 100644 --- a/llvm/lib/Support/BalancedPartitioning.cpp +++ b/llvm/lib/Support/BalancedPartitioning.cpp @@ -231,7 +231,7 @@ unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes,    }    // Compute move gains -  typedef std::pair<float, BPFunctionNode *> GainPair; +  using GainPair = std::pair<float, BPFunctionNode *>;    std::vector<GainPair> Gains;    for (auto &N : Nodes) {      bool FromLeftToRight = (N.Bucket == LeftBucket); diff --git a/llvm/lib/Support/BranchProbability.cpp b/llvm/lib/Support/BranchProbability.cpp index ea42f34..143e58a 100644 --- a/llvm/lib/Support/BranchProbability.cpp +++ b/llvm/lib/Support/BranchProbability.cpp @@ -20,8 +20,6 @@  using namespace llvm; -constexpr uint32_t BranchProbability::D; -  raw_ostream &BranchProbability::print(raw_ostream &OS) const {    if (isUnknown())      return OS << "?%"; diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 9491ec0..dab8bee 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -382,7 +382,7 @@ public:      RegisteredSubCommands.erase(sub);    } -  iterator_range<typename SmallPtrSet<SubCommand *, 4>::iterator> +  iterator_range<SmallPtrSet<SubCommand *, 4>::iterator>    getRegisteredSubcommands() {      return make_range(RegisteredSubCommands.begin(),                        RegisteredSubCommands.end()); @@ -2343,10 +2343,10 @@ namespace {  class HelpPrinter {  protected:    const bool ShowHidden; -  typedef SmallVector<std::pair<const char *, Option *>, 128> -      StrOptionPairVector; -  typedef SmallVector<std::pair<const char *, SubCommand *>, 128> -      StrSubCommandPairVector; +  using StrOptionPairVector = +      SmallVector<std::pair<const char *, Option *>, 128>; +  using StrSubCommandPairVector = +      SmallVector<std::pair<const char *, SubCommand *>, 128>;    // Print the options. Opts is assumed to be alphabetically sorted.    virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) {      for (const auto &Opt : Opts) @@ -2830,7 +2830,7 @@ StringMap<Option *> &cl::getRegisteredOptions(SubCommand &Sub) {    return Sub.OptionsMap;  } -iterator_range<typename SmallPtrSet<SubCommand *, 4>::iterator> +iterator_range<SmallPtrSet<SubCommand *, 4>::iterator>  cl::getRegisteredSubcommands() {    return GlobalParser->getRegisteredSubcommands();  } diff --git a/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/llvm/lib/Support/DAGDeltaAlgorithm.cpp index 98153647..3bfae14 100644 --- a/llvm/lib/Support/DAGDeltaAlgorithm.cpp +++ b/llvm/lib/Support/DAGDeltaAlgorithm.cpp @@ -47,16 +47,16 @@ class DAGDeltaAlgorithmImpl {    friend class DeltaActiveSetHelper;  public: -  typedef DAGDeltaAlgorithm::change_ty change_ty; -  typedef DAGDeltaAlgorithm::changeset_ty changeset_ty; -  typedef DAGDeltaAlgorithm::changesetlist_ty changesetlist_ty; -  typedef DAGDeltaAlgorithm::edge_ty edge_ty; +  using change_ty = DAGDeltaAlgorithm::change_ty; +  using changeset_ty = DAGDeltaAlgorithm::changeset_ty; +  using changesetlist_ty = DAGDeltaAlgorithm::changesetlist_ty; +  using edge_ty = DAGDeltaAlgorithm::edge_ty;  private: -  typedef std::vector<change_ty>::iterator pred_iterator_ty; -  typedef std::vector<change_ty>::iterator succ_iterator_ty; -  typedef std::set<change_ty>::iterator pred_closure_iterator_ty; -  typedef std::set<change_ty>::iterator succ_closure_iterator_ty; +  using pred_iterator_ty = std::vector<change_ty>::iterator; +  using succ_iterator_ty = std::vector<change_ty>::iterator; +  using pred_closure_iterator_ty = std::set<change_ty>::iterator; +  using succ_closure_iterator_ty = std::set<change_ty>::iterator;    DAGDeltaAlgorithm &DDA; diff --git a/llvm/lib/Support/DynamicLibrary.cpp b/llvm/lib/Support/DynamicLibrary.cpp index f1c15c0..61566d3 100644 --- a/llvm/lib/Support/DynamicLibrary.cpp +++ b/llvm/lib/Support/DynamicLibrary.cpp @@ -23,7 +23,7 @@ using namespace llvm::sys;  // All methods for HandleSet should be used holding SymbolsMutex.  class DynamicLibrary::HandleSet { -  typedef std::vector<void *> HandleList; +  using HandleList = std::vector<void *>;    HandleList Handles;    void *Process = &Invalid; diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp index b6a2f8a..2e8fba8 100644 --- a/llvm/lib/Support/StringRef.cpp +++ b/llvm/lib/Support/StringRef.cpp @@ -17,11 +17,6 @@  using namespace llvm; -// MSVC emits references to this into the translation units which reference it. -#ifndef _MSC_VER -constexpr size_t StringRef::npos; -#endif -  // strncasecmp() is not available on non-POSIX systems, so define an  // alternative function here.  static int ascii_strncasecmp(StringRef LHS, StringRef RHS) { diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp index 9d45096..b08f508 100644 --- a/llvm/lib/Support/Timer.cpp +++ b/llvm/lib/Support/Timer.cpp @@ -207,7 +207,7 @@ void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {  namespace { -typedef StringMap<Timer> Name2TimerMap; +using Name2TimerMap = StringMap<Timer>;  class Name2PairMap {    StringMap<std::pair<TimerGroup*, Name2TimerMap> > Map; diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp index 6f8e091..8f0d24e 100644 --- a/llvm/lib/Support/UnicodeNameToCodepoint.cpp +++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp @@ -251,10 +251,10 @@ constexpr const char *const HangulSyllables[][3] = {  // Unicode 15.0  // 3.12 Conjoining Jamo Behavior Common constants -constexpr const char32_t SBase = 0xAC00; -constexpr const uint32_t LCount = 19; -constexpr const uint32_t VCount = 21; -constexpr const uint32_t TCount = 28; +constexpr char32_t SBase = 0xAC00; +constexpr uint32_t LCount = 19; +constexpr uint32_t VCount = 21; +constexpr uint32_t TCount = 28;  static std::size_t findSyllable(StringRef Name, bool Strict,                                  char &PreviousInName, int &Pos, int Column) { diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index 7e03b97..45b7120 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -370,6 +370,22 @@ SVEFrameSizes AArch64PrologueEpilogueCommon::getSVEStackFrameSizes() const {            {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};  } +SVEStackAllocations AArch64PrologueEpilogueCommon::getSVEStackAllocations( +    SVEFrameSizes const &SVE) { +  StackOffset AfterZPRs = SVE.ZPR.LocalsSize; +  StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize; +  StackOffset AfterPPRs = {}; +  if (SVELayout == SVEStackLayout::Split) { +    BeforePPRs = SVE.PPR.CalleeSavesSize; +    // If there are no ZPR CSRs, place all local allocations after the ZPRs. +    if (SVE.ZPR.CalleeSavesSize) +      AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize; +    else +      AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals. +  } +  return {BeforePPRs, AfterPPRs, AfterZPRs}; +} +  struct SVEPartitions {    struct {      MachineBasicBlock::iterator Begin, End; @@ -687,16 +703,19 @@ void AArch64PrologueEmitter::emitPrologue() {    // All of the remaining stack allocations are for locals.    determineLocalsStackSize(NumBytes, PrologueSaveSize); +  auto [PPR, ZPR] = getSVEStackFrameSizes(); +  SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR}); +    MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;    if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { +    assert(!SVEAllocs.AfterPPRs && +           "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");      // If we're doing SVE saves first, we need to immediately allocate space      // for fixed objects, then space for the SVE callee saves.      //      // Windows unwind requires that the scalable size is a multiple of 16;      // that's handled when the callee-saved size is computed. -    auto SaveSize = -        StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) + -        StackOffset::getFixed(FixedObject); +    auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);      allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},                         /*FollowupAllocs=*/true);      NumBytes -= FixedObject; @@ -764,12 +783,11 @@ void AArch64PrologueEmitter::emitPrologue() {    if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))      emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding); -  auto [PPR, ZPR] = getSVEStackFrameSizes(); -  StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;    StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes); +  SVEAllocs.AfterZPRs += NonSVELocalsSize; +    StackOffset CFAOffset =        StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize; -    MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;    // Allocate space for the callee saves and PPR locals (if any).    if (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord) { @@ -780,31 +798,23 @@ void AArch64PrologueEmitter::emitPrologue() {      if (EmitAsyncCFI)        emitCalleeSavedSVELocations(AfterSVESavesI); -    StackOffset AllocateBeforePPRs = SVECalleeSavesSize; -    StackOffset AllocateAfterPPRs = PPR.LocalsSize; -    if (SVELayout == SVEStackLayout::Split) { -      AllocateBeforePPRs = PPR.CalleeSavesSize; -      AllocateAfterPPRs = PPR.LocalsSize + ZPR.CalleeSavesSize; -    } -    allocateStackSpace(PPRRange.Begin, 0, AllocateBeforePPRs, +    allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,                         EmitAsyncCFI && !HasFP, CFAOffset, -                       MFI.hasVarSizedObjects() || AllocateAfterPPRs || -                           ZPR.LocalsSize || NonSVELocalsSize); -    CFAOffset += AllocateBeforePPRs; +                       MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs || +                           SVEAllocs.AfterZPRs); +    CFAOffset += SVEAllocs.BeforePPRs;      assert(PPRRange.End == ZPRRange.Begin &&             "Expected ZPR callee saves after PPR locals"); -    allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs, +    allocateStackSpace(PPRRange.End, RealignmentPadding, SVEAllocs.AfterPPRs,                         EmitAsyncCFI && !HasFP, CFAOffset, -                       MFI.hasVarSizedObjects() || ZPR.LocalsSize || -                           NonSVELocalsSize); -    CFAOffset += AllocateAfterPPRs; +                       MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs); +    CFAOffset += SVEAllocs.AfterPPRs;    } else {      assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord); -    // Note: With CalleeSavesAboveFrameRecord, the SVE CS have already been -    // allocated (and separate PPR locals are not supported, all SVE locals, -    // both PPR and ZPR, are within the ZPR locals area). -    assert(!PPR.LocalsSize && "Unexpected PPR locals!"); -    CFAOffset += SVECalleeSavesSize; +    // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have +    // already been allocated. PPR locals (included in AfterPPRs) are not +    // supported (note: this is asserted above). +    CFAOffset += SVEAllocs.BeforePPRs;    }    // Allocate space for the rest of the frame including ZPR locals. Align the @@ -815,9 +825,9 @@ void AArch64PrologueEmitter::emitPrologue() {      // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the      // correct value here, as NumBytes also includes padding bytes, which      // shouldn't be counted here. -    allocateStackSpace( -        AfterSVESavesI, RealignmentPadding, ZPR.LocalsSize + NonSVELocalsSize, -        EmitAsyncCFI && !HasFP, CFAOffset, MFI.hasVarSizedObjects()); +    allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs, +                       EmitAsyncCFI && !HasFP, CFAOffset, +                       MFI.hasVarSizedObjects());    }    // If we need a base pointer, set it up here. It's whatever the value of the @@ -1472,27 +1482,26 @@ void AArch64EpilogueEmitter::emitEpilogue() {    assert(NumBytes >= 0 && "Negative stack allocation size!?");    StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize; -  StackOffset SVEStackSize = -      SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize; +  SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});    MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin; -  MachineBasicBlock::iterator RestoreEnd = PPRRange.End;    // Deallocate the SVE area.    if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { -    StackOffset SVELocalsSize = ZPR.LocalsSize + PPR.LocalsSize; +    assert(!SVEAllocs.AfterPPRs && +           "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");      // If the callee-save area is before FP, restoring the FP implicitly -    // deallocates non-callee-save SVE allocations.  Otherwise, deallocate them +    // deallocates non-callee-save SVE allocations. Otherwise, deallocate them      // explicitly.      if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {        emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP, -                      SVELocalsSize, TII, MachineInstr::FrameDestroy, false, -                      NeedsWinCFI, &HasWinCFI); +                      SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy, +                      false, NeedsWinCFI, &HasWinCFI);      }      // Deallocate callee-save SVE registers. -    emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, -                    SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false, -                    NeedsWinCFI, &HasWinCFI); +    emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, +                    SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy, +                    false, NeedsWinCFI, &HasWinCFI);    } else if (AFI->hasSVEStackSize()) {      // If we have stack realignment or variable-sized objects we must use the FP      // to restore SVE callee saves (as there is an unknown amount of @@ -1524,46 +1533,33 @@ void AArch64EpilogueEmitter::emitEpilogue() {        emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,                        -SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);      } else if (BaseForSVEDealloc == AArch64::SP) { -      auto CFAOffset = -          SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize); - -      if (SVECalleeSavesSize) { -        // Deallocate the non-SVE locals first before we can deallocate (and -        // restore callee saves) from the SVE area. -        auto NonSVELocals = StackOffset::getFixed(NumBytes); -        emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, -                        NonSVELocals, TII, MachineInstr::FrameDestroy, false, -                        NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset); -        CFAOffset -= NonSVELocals; -        NumBytes = 0; -      } - -      if (ZPR.LocalsSize) { -        emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, -                        ZPR.LocalsSize, TII, MachineInstr::FrameDestroy, false, -                        NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset); -        CFAOffset -= ZPR.LocalsSize; +      auto NonSVELocals = StackOffset::getFixed(NumBytes); +      auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) + +                       SVEAllocs.totalSize(); + +      if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) { +        // Deallocate non-SVE locals now. This is needed to reach the SVE callee +        // saves, but may also allow combining stack hazard bumps for split SVE. +        SVEAllocs.AfterZPRs += NonSVELocals; +        NumBytes -= NonSVELocals.getFixed();        } - -      StackOffset SVECalleeSavesToDealloc = SVECalleeSavesSize; -      if (SVELayout == SVEStackLayout::Split && -          (PPR.LocalsSize || ZPR.CalleeSavesSize)) { -        assert(PPRRange.Begin == ZPRRange.End && -               "Expected PPR restores after ZPR"); -        emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP, -                        PPR.LocalsSize + ZPR.CalleeSavesSize, TII, -                        MachineInstr::FrameDestroy, false, NeedsWinCFI, -                        &HasWinCFI, EmitCFI && !HasFP, CFAOffset); -        CFAOffset -= PPR.LocalsSize + ZPR.CalleeSavesSize; -        SVECalleeSavesToDealloc -= ZPR.CalleeSavesSize; -      } - -      // If split SVE is on, this dealloc PPRs, otherwise, deallocs ZPRs + PPRs: -      if (SVECalleeSavesToDealloc) -        emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, -                        SVECalleeSavesToDealloc, TII, -                        MachineInstr::FrameDestroy, false, NeedsWinCFI, -                        &HasWinCFI, EmitCFI && !HasFP, CFAOffset); +      // To deallocate the SVE stack adjust by the allocations in reverse. +      emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, +                      SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy, +                      false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, +                      CFAOffset); +      CFAOffset -= SVEAllocs.AfterZPRs; +      assert(PPRRange.Begin == ZPRRange.End && +             "Expected PPR restores after ZPR"); +      emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP, +                      SVEAllocs.AfterPPRs, TII, MachineInstr::FrameDestroy, +                      false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, +                      CFAOffset); +      CFAOffset -= SVEAllocs.AfterPPRs; +      emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, +                      SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy, +                      false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, +                      CFAOffset);      }      if (EmitCFI) diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h index bccadda..6e0e283 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h @@ -33,6 +33,11 @@ struct SVEFrameSizes {    } PPR, ZPR;  }; +struct SVEStackAllocations { +  StackOffset BeforePPRs, AfterPPRs, AfterZPRs; +  StackOffset totalSize() const { return BeforePPRs + AfterPPRs + AfterZPRs; } +}; +  class AArch64PrologueEpilogueCommon {  public:    AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, @@ -66,6 +71,7 @@ protected:    bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;    SVEFrameSizes getSVEStackFrameSizes() const; +  SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &);    MachineFunction &MF;    MachineBasicBlock &MBB; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 655e818..5b5565a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2227,7 +2227,7 @@ static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,    return std::nullopt;  } -template <Intrinsic::ID MulOpc, typename Intrinsic::ID FuseOpc> +template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>  static std::optional<Instruction *>  instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II,                                    bool MergeIntoAddendOp) { @@ -6657,10 +6657,15 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(            Ops.push_back(&Ext->getOperandUse(0));          Ops.push_back(&Op); -        if (isa<SExtInst>(Ext)) +        if (isa<SExtInst>(Ext)) {            NumSExts++; -        else +        } else {            NumZExts++; +          // A zext(a) is also a sext(zext(a)), if we take more than 2 steps. +          if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 < +              I->getType()->getScalarSizeInBits()) +            NumSExts++; +        }          continue;        } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index cd8b249..67042b7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -69,7 +69,7 @@ FunctionPass *createAMDGPUPreloadKernArgPrologLegacyPass();  ModulePass *createAMDGPUPreloadKernelArgumentsLegacyPass(const TargetMachine *);  struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { -  AMDGPUSimplifyLibCallsPass() {} +  AMDGPUSimplifyLibCallsPass() = default;    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);  }; @@ -371,13 +371,13 @@ public:  class AMDGPUAnnotateUniformValuesPass      : public PassInfoMixin<AMDGPUAnnotateUniformValuesPass> {  public: -  AMDGPUAnnotateUniformValuesPass() {} +  AMDGPUAnnotateUniformValuesPass() = default;    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);  };  class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {  public: -  SIModeRegisterPass() {} +  SIModeRegisterPass() = default;    PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM);  }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index 1064e57..dad94b8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -96,7 +96,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {  }  struct KernArgPreloadDescriptor : public ArgDescriptor { -  KernArgPreloadDescriptor() {} +  KernArgPreloadDescriptor() = default;    SmallVector<MCRegister> Regs;  }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 9907c88f..8669978 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1555,7 +1555,7 @@ private:    AMDGPU::ClusterDimsAttr Attr; -  static constexpr const char AttrName[] = "amdgpu-cluster-dims"; +  static constexpr char AttrName[] = "amdgpu-cluster-dims";  };  AAAMDGPUClusterDims & diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h index cf2ab825..a3be0f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h @@ -48,7 +48,7 @@ private:    FuncInfoMap FIM;  public: -  AMDGPUPerfHintAnalysis() {} +  AMDGPUPerfHintAnalysis() = default;    // OldPM    bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 103cdec..1e5885a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -202,7 +202,7 @@ bool PredicateMapping::match(const MachineInstr &MI,    return true;  } -SetOfRulesForOpcode::SetOfRulesForOpcode() {} +SetOfRulesForOpcode::SetOfRulesForOpcode() = default;  SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)      : FastTypes(FastTypes) {} @@ -913,6 +913,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,    addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}}); +  addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}}); +    addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)        .Uni(S64, {{Sgpr64}, {}}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index 733c5d5..fe81a5e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -181,14 +181,52 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(    return NewRetBlock;  } +static BasicBlock * +createDummyReturnBlock(Function &F, +                       SmallVector<BasicBlock *, 4> &ReturningBlocks) { +  BasicBlock *DummyReturnBB = +      BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F); +  Type *RetTy = F.getReturnType(); +  Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy); +  ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB); +  ReturningBlocks.push_back(DummyReturnBB); +  return DummyReturnBB; +} + +/// Handle conditional branch instructions (-> 2 targets) and callbr +/// instructions with N targets. +static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI, +                          BasicBlock *DummyReturnBB, +                          std::vector<DominatorTree::UpdateType> &Updates) { +  SmallVector<BasicBlock *, 2> Successors(successors(BB)); + +  // Create a new transition block to hold the conditional branch. +  BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock"); + +  Updates.reserve(Updates.size() + 2 * Successors.size() + 2); + +  // 'Successors' become successors of TransitionBB instead of BB, +  // and TransitionBB becomes a single successor of BB. +  Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB); +  for (BasicBlock *Successor : Successors) { +    Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor); +    Updates.emplace_back(DominatorTree::Delete, BB, Successor); +  } + +  // Create a branch that will always branch to the transition block and +  // references DummyReturnBB. +  BB->getTerminator()->eraseFromParent(); +  BranchInst::Create(TransitionBB, DummyReturnBB, +                     ConstantInt::getTrue(F.getContext()), BB); +  Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); +} +  bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,                                              const PostDominatorTree &PDT,                                              const UniformityInfo &UA) { -  assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator."); -    if (PDT.root_size() == 0 ||        (PDT.root_size() == 1 && -       !isa<BranchInst>(PDT.getRoot()->getTerminator()))) +       !isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))      return false;    // Loop over all of the blocks in a function, tracking all of the blocks that @@ -222,46 +260,28 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,        if (HasDivergentExitBlock)          UnreachableBlocks.push_back(BB);      } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { - -      ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext()); -      if (DummyReturnBB == nullptr) { -        DummyReturnBB = BasicBlock::Create(F.getContext(), -                                           "DummyReturnBlock", &F); -        Type *RetTy = F.getReturnType(); -        Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy); -        ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB); -        ReturningBlocks.push_back(DummyReturnBB); -      } +      if (!DummyReturnBB) +        DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);        if (BI->isUnconditional()) {          BasicBlock *LoopHeaderBB = BI->getSuccessor(0);          BI->eraseFromParent(); // Delete the unconditional branch.          // Add a new conditional branch with a dummy edge to the return block. -        BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB); -        Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); -      } else { // Conditional branch. -        SmallVector<BasicBlock *, 2> Successors(successors(BB)); - -        // Create a new transition block to hold the conditional branch. -        BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock"); - -        Updates.reserve(Updates.size() + 2 * Successors.size() + 2); - -        // 'Successors' become successors of TransitionBB instead of BB, -        // and TransitionBB becomes a single successor of BB. -        Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB); -        for (BasicBlock *Successor : Successors) { -          Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor); -          Updates.emplace_back(DominatorTree::Delete, BB, Successor); -        } - -        // Create a branch that will always branch to the transition block and -        // references DummyReturnBB. -        BB->getTerminator()->eraseFromParent(); -        BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB); +        BranchInst::Create(LoopHeaderBB, DummyReturnBB, +                           ConstantInt::getTrue(F.getContext()), BB);          Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); +      } else { +        handleNBranch(F, BB, BI, DummyReturnBB, Updates);        }        Changed = true; +    } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) { +      if (!DummyReturnBB) +        DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks); + +      handleNBranch(F, BB, CBI, DummyReturnBB, Updates); +      Changed = true; +    } else { +      llvm_unreachable("unsupported block terminator");      }    } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp index 61c5dcd..ded2f5a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp @@ -54,7 +54,7 @@ public:    bool CullSGPRHazardsAtMemWait;    unsigned CullSGPRHazardsMemWaitThreshold; -  AMDGPUWaitSGPRHazards() {} +  AMDGPUWaitSGPRHazards() = default;    // Return the numeric ID 0-127 for a given SGPR.    static std::optional<unsigned> sgprNumber(Register Reg, diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 975781f..f357981 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -183,7 +183,7 @@ class ScheduleMetrics {    unsigned BubbleCycles;  public: -  ScheduleMetrics() {} +  ScheduleMetrics() = default;    ScheduleMetrics(unsigned L, unsigned BC)        : ScheduleLength(L), BubbleCycles(BC) {}    unsigned getLength() const { return ScheduleLength; } @@ -217,7 +217,7 @@ class RegionPressureMap {    bool IsLiveOut;  public: -  RegionPressureMap() {} +  RegionPressureMap() = default;    RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)        : DAG(GCNDAG), IsLiveOut(LiveOut) {}    // Build the Instr->LiveReg and RegionIdx->Instr maps diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d9f76c9..45f5919 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6153,7 +6153,7 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,    // information.    if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) &&        MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) { -    constexpr const AMDGPU::OpName OpNames[] = { +    constexpr AMDGPU::OpName OpNames[] = {          AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};      for (auto [I, OpName] : enumerate(OpNames)) { @@ -6215,8 +6215,8 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,  bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand(      const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,      const MachineOperand *MO) const { -  constexpr const unsigned NumOps = 3; -  constexpr const AMDGPU::OpName OpNames[NumOps * 2] = { +  constexpr unsigned NumOps = 3; +  constexpr AMDGPU::OpName OpNames[NumOps * 2] = {        AMDGPU::OpName::src0,           AMDGPU::OpName::src1,        AMDGPU::OpName::src2,           AMDGPU::OpName::src0_modifiers,        AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers}; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 6b06534..3a00267 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -9869,32 +9869,12 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {    assert(Subtarget->isTargetDarwin());    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); -  auto PtrVT = getPointerTy(DAG.getDataLayout()); - -  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();    // Pair of floats / doubles used to pass the result.    Type *RetTy = StructType::get(ArgTy, ArgTy);    auto &DL = DAG.getDataLayout();    ArgListTy Args; -  bool ShouldUseSRet = getTM().isAPCS_ABI(); -  SDValue SRet; -  if (ShouldUseSRet) { -    // Create stack object for sret. -    const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); -    const Align StackAlign = DL.getPrefTypeAlign(RetTy); -    int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); -    SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL)); - -    ArgListEntry Entry(SRet, PointerType::getUnqual(RetTy->getContext())); -    Entry.IsSExt = false; -    Entry.IsZExt = false; -    Entry.IsSRet = true; -    Args.push_back(Entry); -    RetTy = Type::getVoidTy(*DAG.getContext()); -  } -    Args.emplace_back(Arg, ArgTy);    StringRef LibcallName = getLibcallImplName(SincosStret); @@ -9904,25 +9884,10 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {    TargetLowering::CallLoweringInfo CLI(DAG);    CLI.setDebugLoc(dl)        .setChain(DAG.getEntryNode()) -      .setCallee(CC, RetTy, Callee, std::move(Args)) -      .setDiscardResult(ShouldUseSRet); +      .setCallee(CC, RetTy, Callee, std::move(Args));    std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); -  if (!ShouldUseSRet) -    return CallResult.first; - -  SDValue LoadSin = -      DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo()); - -  // Address of cos field. -  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet, -                            DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl)); -  SDValue LoadCos = -      DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo()); - -  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); -  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, -                     LoadSin.getValue(0), LoadCos.getValue(0)); +  return CallResult.first;  }  SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp index 77dc4a7..b2a8204 100644 --- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp @@ -88,6 +88,16 @@ bool BPFAsmPrinter::doFinalization(Module &M) {      }    } +  for (GlobalObject &GO : M.global_objects()) { +    if (!GO.hasExternalWeakLinkage()) +      continue; + +    if (!SawTrapCall && GO.getName() == BPF_TRAP) { +      GO.eraseFromParent(); +      break; +    } +  } +    return AsmPrinter::doFinalization(M);  } @@ -160,6 +170,20 @@ bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,  }  void BPFAsmPrinter::emitInstruction(const MachineInstr *MI) { +  if (MI->isCall()) { +    for (const MachineOperand &Op : MI->operands()) { +      if (Op.isGlobal()) { +        if (const GlobalValue *GV = Op.getGlobal()) +          if (GV->getName() == BPF_TRAP) +            SawTrapCall = true; +      } else if (Op.isSymbol()) { +        if (const MCSymbol *Sym = Op.getMCSymbol()) +          if (Sym->getName() == BPF_TRAP) +            SawTrapCall = true; +      } +    } +  } +    BPF_MC::verifyInstructionPredicates(MI->getOpcode(),                                        getSubtargetInfo().getFeatureBits()); diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.h b/llvm/lib/Target/BPF/BPFAsmPrinter.h index 90ef207..75a1d7e 100644 --- a/llvm/lib/Target/BPF/BPFAsmPrinter.h +++ b/llvm/lib/Target/BPF/BPFAsmPrinter.h @@ -39,6 +39,7 @@ public:  private:    BTFDebug *BTF;    TargetMachine &TM; +  bool SawTrapCall = false;    const BPFTargetMachine &getBTM() const;  }; diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index 8ace2d2..eb4c884 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -194,9 +194,10 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) {          dxbc::PSV::v2::ResourceBindInfo BindInfo;          BindInfo.Type = Type;          BindInfo.LowerBound = Binding.LowerBound; -        assert(Binding.Size == UINT32_MAX || -               (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX && -                   "Resource range is too large"); +        assert( +            (Binding.Size == UINT32_MAX || +             (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX) && +            "Resource range is too large");          BindInfo.UpperBound = (Binding.Size == UINT32_MAX)                                    ? UINT32_MAX                                    : Binding.LowerBound + Binding.Size - 1; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 7ee280d..eadf020 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -1815,7 +1815,7 @@ struct WeightedLeaf {    int Weight;    int InsertionOrder; -  WeightedLeaf() {} +  WeightedLeaf() = default;    WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :      Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 904aabed..fe700e1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -375,6 +375,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,        setOperationAction(ISD::FFLOOR, VT, Legal);        setOperationAction(ISD::FTRUNC, VT, Legal);        setOperationAction(ISD::FROUNDEVEN, VT, Legal); +      setOperationAction(ISD::FMINNUM, VT, Legal); +      setOperationAction(ISD::FMAXNUM, VT, Legal);      }      setOperationAction(ISD::CTPOP, GRLenVT, Legal);      setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal); @@ -461,6 +463,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,        setOperationAction(ISD::FFLOOR, VT, Legal);        setOperationAction(ISD::FTRUNC, VT, Legal);        setOperationAction(ISD::FROUNDEVEN, VT, Legal); +      setOperationAction(ISD::FMINNUM, VT, Legal); +      setOperationAction(ISD::FMAXNUM, VT, Legal);      }    } diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 610ba05..b502b056 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1558,6 +1558,10 @@ defm : PatXrXrF<fmul, "XVFMUL">;  // XVFDIV_{S/D}  defm : PatXrXrF<fdiv, "XVFDIV">; +// XVFMAX_{S/D}, XVFMIN_{S/D} +defm : PatXrXrF<fmaxnum, "XVFMAX">; +defm : PatXrXrF<fminnum, "XVFMIN">; +  // XVFMADD_{S/D}  def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),            (XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 6470842..6b74a4b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1760,6 +1760,10 @@ defm : PatVrVrF<fmul, "VFMUL">;  // VFDIV_{S/D}  defm : PatVrVrF<fdiv, "VFDIV">; +// VFMAX_{S/D}, VFMIN_{S/D} +defm : PatVrVrF<fmaxnum, "VFMAX">; +defm : PatVrVrF<fminnum, "VFMIN">; +  // VFMADD_{S/D}  def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va),            (VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp index 7d54565..6d69af5 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp @@ -39,7 +39,7 @@ LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit)      : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH,                                /*HasRelocationAddend=*/true) {} -LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {} +LoongArchELFObjectWriter::~LoongArchELFObjectWriter() = default;  unsigned LoongArchELFObjectWriter::getRelocType(const MCFixup &Fixup,                                                  const MCValue &Target, diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index f0e2bc4..08fa51d 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -38,7 +38,7 @@ public:    LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)        : Ctx(ctx), MCII(MCII) {} -  ~LoongArchMCCodeEmitter() override {} +  ~LoongArchMCCodeEmitter() override = default;    void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,                           SmallVectorImpl<MCFixup> &Fixups, diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h index caef8fe7..b832b82 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h @@ -20,7 +20,7 @@ class MemoryLocation;  class NVPTXAAResult : public AAResultBase {  public: -  NVPTXAAResult() {} +  NVPTXAAResult() = default;    NVPTXAAResult(NVPTXAAResult &&Arg) : AAResultBase(std::move(Arg)) {}    /// Handle invalidation events from the new pass manager. diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index c667a09..996d653 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1836,7 +1836,7 @@ bool NVPTXDAGToDAGISel::tryFence(SDNode *N) {    return true;  } -NVPTXScopes::NVPTXScopes(LLVMContext &C) { +NVPTXScopes::NVPTXScopes(LLVMContext &C) : Context(&C) {    Scopes[C.getOrInsertSyncScopeID("singlethread")] = NVPTX::Scope::Thread;    Scopes[C.getOrInsertSyncScopeID("")] = NVPTX::Scope::System;    Scopes[C.getOrInsertSyncScopeID("block")] = NVPTX::Scope::Block; @@ -1851,11 +1851,21 @@ NVPTX::Scope NVPTXScopes::operator[](SyncScope::ID ID) const {    auto S = Scopes.find(ID);    if (S == Scopes.end()) { -    // TODO: -    // - Add API to LLVMContext to get the name of a single scope. -    // - Use that API here to print an error containing the name -    //   of this Unknown ID. -    report_fatal_error(formatv("Could not find scope ID={}.", int(ID))); +    auto scopeName = Context->getSyncScopeName(ID); +    assert(scopeName.has_value() && "Scope name must exist."); + +    // Build list of supported syncscopes programmatically +    SmallVector<StringRef> supportedScopes; +    for (const auto &Entry : Scopes) { +      if (auto name = Context->getSyncScopeName(Entry.first)) +        supportedScopes.push_back(name->empty() ? "<empty string>" : *name); +    } + +    reportFatalUsageError( +        formatv("NVPTX backend does not support syncscope \"{0}\" (ID={1}).\n" +                "Supported syncscopes are: {2}.", +                scopeName.value(), int(ID), +                make_range(supportedScopes.begin(), supportedScopes.end())));    }    return S->second;  } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 1cb579b..d525531 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -35,6 +35,7 @@ struct NVPTXScopes {  private:    SmallMapVector<SyncScope::ID, NVPTX::Scope, 8> Scopes{}; +  LLVMContext *Context = nullptr;  };  class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index da3efdc..0c2e44e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -360,6 +360,10 @@ let Predicates = [HasVSX, IsISAFuture] in {      def LXVPRLL : XForm_XTp5_RAB5<31, 621, (outs vsrprc:$XTp),                                    (ins (memr $RA):$addr, g8rc:$RB),                                    "lxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>; +    def LXVPB32X +        : XForm_XTp5_RAB5<31, 877, (outs vsrprc:$XTp), +                          (ins (memr $RA):$addr, g8rc:$RB), +                          "lxvpb32x $XTp, $addr, $RB", IIC_LdStLFD, []>;    }    let mayStore = 1 in { @@ -376,6 +380,10 @@ let Predicates = [HasVSX, IsISAFuture] in {          : XForm_XTp5_RAB5<31, 749, (outs),                            (ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB),                            "stxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>; +    def STXVPB32X +        : XForm_XTp5_RAB5<31, 1005, (outs), +                          (ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB), +                          "stxvpb32x $XTp, $addr, $RB", IIC_LdStLFD, []>;    }    def VUPKHSNTOB : VXForm_VRTB5<387, 0, (outs vrrc:$VRT), (ins vrrc:$VRB), diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index e75dfe3..5b8cfb2 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -407,7 +407,6 @@ enum OperandType : unsigned {    OPERAND_SIMM5_PLUS1,    OPERAND_SIMM6,    OPERAND_SIMM6_NONZERO, -  OPERAND_SIMM8,    OPERAND_SIMM8_UNSIGNED,    OPERAND_SIMM10,    OPERAND_SIMM10_LSB0000_NONZERO, diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index b25a054..9078335 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -371,8 +371,8 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,    RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);    unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());    if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { -    report_fatal_error("The V extension does not support EEW=64 for index " -                       "values when XLEN=32"); +    reportFatalUsageError("The V extension does not support EEW=64 for index " +                          "values when XLEN=32");    }    const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(        NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), @@ -444,8 +444,8 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,    RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);    unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());    if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { -    report_fatal_error("The V extension does not support EEW=64 for index " -                       "values when XLEN=32"); +    reportFatalUsageError("The V extension does not support EEW=64 for index " +                          "values when XLEN=32");    }    const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(        NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), @@ -2223,8 +2223,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {        RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);        unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());        if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { -        report_fatal_error("The V extension does not support EEW=64 for index " -                           "values when XLEN=32"); +        reportFatalUsageError("The V extension does not support EEW=64 for " +                              "index values when XLEN=32");        }        const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(            IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), @@ -2457,8 +2457,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {        RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);        unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());        if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { -        report_fatal_error("The V extension does not support EEW=64 for index " -                           "values when XLEN=32"); +        reportFatalUsageError("The V extension does not support EEW=64 for " +                              "index values when XLEN=32");        }        const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(            IsMasked, IsOrdered, IndexLog2EEW, diff --git a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp index a1c8e23..c58a5c0 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp @@ -48,7 +48,7 @@ class VXRMInfo {    } State = Uninitialized;  public: -  VXRMInfo() {} +  VXRMInfo() = default;    static VXRMInfo getUnknown() {      VXRMInfo Info; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index c31713e..1c6a5af 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -90,6 +90,7 @@ defvar ZfhminDExts = [ZfhminDExt, ZhinxminZdinxExt, ZhinxminZdinx32Ext];  //===----------------------------------------------------------------------===//  let Predicates = [HasHalfFPLoadStoreMove] in { +let canFoldAsLoad = 1 in  def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>;  // Operands for stores are in the order srcreg, base, offset rather than diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp index 0a318e0..ed6d355 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp @@ -15,4 +15,4 @@  using namespace llvm;  SPIRVTargetStreamer::SPIRVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} -SPIRVTargetStreamer::~SPIRVTargetStreamer() {} +SPIRVTargetStreamer::~SPIRVTargetStreamer() = default; diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 9e11c3a..dd57b74 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -149,23 +149,23 @@ static FunctionType *getOriginalFunctionType(const Function &F) {          return isa<MDString>(N->getOperand(0)) &&                 cast<MDString>(N->getOperand(0))->getString() == F.getName();        }); -  // TODO: probably one function can have numerous type mutations, -  // so we should support this.    if (ThisFuncMDIt != NamedMD->op_end()) {      auto *ThisFuncMD = *ThisFuncMDIt; -    MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(1)); -    assert(MD && "MDNode operand is expected"); -    ConstantInt *Const = getConstInt(MD, 0); -    if (Const) { -      auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1)); -      assert(CMeta && "ConstantAsMetadata operand is expected"); -      assert(Const->getSExtValue() >= -1); -      // Currently -1 indicates return value, greater values mean -      // argument numbers. -      if (Const->getSExtValue() == -1) -        RetTy = CMeta->getType(); -      else -        ArgTypes[Const->getSExtValue()] = CMeta->getType(); +    for (unsigned I = 1; I != ThisFuncMD->getNumOperands(); ++I) { +      MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(I)); +      assert(MD && "MDNode operand is expected"); +      ConstantInt *Const = getConstInt(MD, 0); +      if (Const) { +        auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1)); +        assert(CMeta && "ConstantAsMetadata operand is expected"); +        assert(Const->getSExtValue() >= -1); +        // Currently -1 indicates return value, greater values mean +        // argument numbers. +        if (Const->getSExtValue() == -1) +          RetTy = CMeta->getType(); +        else +          ArgTypes[Const->getSExtValue()] = CMeta->getType(); +      }      }    } diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index 2d19f6de..44b6c66 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -81,7 +81,7 @@ private:    void initAvailableCapabilitiesForVulkan(const SPIRVSubtarget &ST);  public: -  RequirementHandler() {} +  RequirementHandler() = default;    void clear() {      MinimalCaps.clear();      AllCaps.clear(); diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 7dd0b95..5ba0356 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -69,7 +69,7 @@ static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {  }  // Pin SPIRVTargetObjectFile's vtables to this file. -SPIRVTargetObjectFile::~SPIRVTargetObjectFile() {} +SPIRVTargetObjectFile::~SPIRVTargetObjectFile() = default;  SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT,                                         StringRef CPU, StringRef FS, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h index 9d0adbb..87ec256 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h @@ -16,7 +16,7 @@ namespace llvm {  /// This implementation is used for SystemZ ELF targets.  class SystemZELFTargetObjectFile : public TargetLoweringObjectFileELF {  public: -  SystemZELFTargetObjectFile() {} +  SystemZELFTargetObjectFile() = default;    /// Describe a TLS variable address within debug info.    const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h index 7845cdf..1bfc61f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h @@ -76,7 +76,7 @@ public:      BlockSet.insert(MBB);    }    ArrayRef<MachineBasicBlock *> getBlocks() const { return Blocks; } -  using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator; +  using block_iterator = ArrayRef<MachineBasicBlock *>::const_iterator;    block_iterator block_begin() const { return getBlocks().begin(); }    block_iterator block_end() const { return getBlocks().end(); }    inline iterator_range<block_iterator> blocks() const { @@ -96,7 +96,7 @@ public:    void addSubException(std::unique_ptr<WebAssemblyException> E) {      SubExceptions.push_back(std::move(E));    } -  using iterator = typename decltype(SubExceptions)::const_iterator; +  using iterator = decltype(SubExceptions)::const_iterator;    iterator begin() const { return SubExceptions.begin(); }    iterator end() const { return SubExceptions.end(); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h index e92bf17..96b8a4e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h @@ -35,7 +35,7 @@ public:    virtual MachineBasicBlock *getHeader() const = 0;    virtual bool contains(const MachineBasicBlock *MBB) const = 0;    virtual unsigned getNumBlocks() const = 0; -  using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator; +  using block_iterator = ArrayRef<MachineBasicBlock *>::const_iterator;    virtual iterator_range<block_iterator> blocks() const = 0;    virtual bool isLoop() const = 0;  }; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 133406b..b97b508 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33034,12 +33034,13 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,        DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));    Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy) -                      : (Type *)FixedVectorType::get(ArgTy, 4); +                      : (Type *)FixedVectorType::get(ArgTy, 2);    TargetLowering::CallLoweringInfo CLI(DAG);    CLI.setDebugLoc(dl)        .setChain(DAG.getEntryNode()) -      .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)); +      .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)) +      .setIsPostTypeLegalization();    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); @@ -53347,6 +53348,80 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,    return SDValue();  } +// Look for a RMW operation that only touches one bit of a larger than legal +// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value. +static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, +                              SelectionDAG &DAG, +                              const X86Subtarget &Subtarget) { +  using namespace SDPatternMatch; + +  // Only handle normal stores and its chain was a matching normal load. +  auto *Ld = dyn_cast<LoadSDNode>(St->getChain()); +  if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld || +      !ISD::isNormalLoad(Ld) || !Ld->isSimple() || +      Ld->getBasePtr() != St->getBasePtr() || +      Ld->getOffset() != St->getOffset()) +    return SDValue(); + +  SDValue LoadVal(Ld, 0); +  SDValue StoredVal = St->getValue(); +  EVT VT = StoredVal.getValueType(); + +  // Only narrow larger than legal scalar integers. +  if (!VT.isScalarInteger() || +      VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32)) +    return SDValue(); + +  // BTR: X & ~(1 << ShAmt) +  // BTS: X | (1 << ShAmt) +  // BTC: X ^ (1 << ShAmt) +  SDValue ShAmt; +  if (!StoredVal.hasOneUse() || +      !(sd_match(StoredVal, m_And(m_Specific(LoadVal), +                                  m_Not(m_Shl(m_One(), m_Value(ShAmt))))) || +        sd_match(StoredVal, +                 m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) || +        sd_match(StoredVal, +                 m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))))) +    return SDValue(); + +  // Ensure the shift amount is in bounds. +  KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); +  if (KnownAmt.getMaxValue().uge(VT.getSizeInBits())) +    return SDValue(); + +  // Split the shift into an alignment shift that moves the active i32 block to +  // the bottom bits for truncation and a modulo shift that can act on the i32. +  EVT AmtVT = ShAmt.getValueType(); +  SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, +                                 DAG.getSignedConstant(-32LL, DL, AmtVT)); +  SDValue ModuloAmt = +      DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT)); + +  // Compute the byte offset for the i32 block that is changed by the RMW. +  // combineTruncate will adjust the load for us in a similar way. +  EVT PtrVT = St->getBasePtr().getValueType(); +  SDValue PtrBitOfs = DAG.getZExtOrTrunc(AlignAmt, DL, PtrVT); +  SDValue PtrByteOfs = DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs, +                                   DAG.getShiftAmountConstant(3, PtrVT, DL)); +  SDValue NewPtr = DAG.getMemBasePlusOffset(St->getBasePtr(), PtrByteOfs, DL, +                                            SDNodeFlags::NoUnsignedWrap); + +  // Reconstruct the BTC/BTR/BTS pattern for the i32 block and store. +  SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt); +  X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); + +  SDValue Mask = +      DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), +                  DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8)); +  if (StoredVal.getOpcode() == ISD::AND) +    Mask = DAG.getNOT(DL, Mask, MVT::i32); + +  SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask); +  return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(), +                      Align(), St->getMemOperand()->getFlags()); +} +  static SDValue combineStore(SDNode *N, SelectionDAG &DAG,                              TargetLowering::DAGCombinerInfo &DCI,                              const X86Subtarget &Subtarget) { @@ -53573,6 +53648,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,      }    } +  if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget)) +    return R; +    // Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)    //         store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC)    if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && @@ -54505,8 +54583,9 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,    // truncation, see if we can convert the shift into a pointer offset instead.    // Limit this to normal (non-ext) scalar integer loads.    if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL && -      Src.hasOneUse() && Src.getOperand(0).hasOneUse() && -      ISD::isNormalLoad(Src.getOperand(0).getNode())) { +      Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) && +      (Src.getOperand(0).hasOneUse() || +       !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, SrcVT))) {      auto *Ld = cast<LoadSDNode>(Src.getOperand(0));      if (Ld->isSimple() && VT.isByteSized() &&          isPowerOf2_64(VT.getSizeInBits())) { @@ -54529,8 +54608,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,          SDValue NewLoad =              DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(),                          Align(), Ld->getMemOperand()->getFlags()); -        DAG.ReplaceAllUsesOfValueWith(Src.getOperand(0).getValue(1), -                                      NewLoad.getValue(1)); +        DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);          return NewLoad;        }      } @@ -56306,6 +56384,7 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC,  static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,                              TargetLowering::DAGCombinerInfo &DCI,                              const X86Subtarget &Subtarget) { +  using namespace SDPatternMatch;    const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();    const SDValue LHS = N->getOperand(0);    const SDValue RHS = N->getOperand(1); @@ -56364,6 +56443,37 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,        if (SDValue AndN = MatchAndCmpEq(RHS, LHS))          return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); +      // If we're performing a bit test on a larger than legal type, attempt +      // to (aligned) shift down the value to the bottom 32-bits and then +      // perform the bittest on the i32 value. +      // ICMP_ZERO(AND(X,SHL(1,IDX))) +      // --> ICMP_ZERO(AND(TRUNC(SRL(X,AND(IDX,-32))),SHL(1,AND(IDX,31)))) +      if (isNullConstant(RHS) && +          OpVT.getScalarSizeInBits() > (Subtarget.is64Bit() ? 64 : 32)) { +        SDValue X, ShAmt; +        if (sd_match(LHS, m_OneUse(m_And(m_Value(X), +                                         m_Shl(m_One(), m_Value(ShAmt)))))) { +          // Only attempt this if the shift amount is known to be in bounds. +          KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); +          if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) { +            EVT AmtVT = ShAmt.getValueType(); +            SDValue AlignAmt = +                DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, +                            DAG.getSignedConstant(-32LL, DL, AmtVT)); +            SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, +                                            DAG.getConstant(31, DL, AmtVT)); +            SDValue Mask = DAG.getNode( +                ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), +                DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8)); +            X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt); +            X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); +            X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask); +            return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32), +                                CC); +          } +        } +      } +        // cmpeq(trunc(x),C) --> cmpeq(x,C)        // cmpne(trunc(x),C) --> cmpne(x,C)        // iff x upper bits are zero. diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp index 090060e..3b96e70 100644 --- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -115,9 +115,9 @@ struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> {    static constexpr MachineInstr *const ArgNodeSentinel = nullptr;    using GraphT = ImmutableGraph<MachineInstr *, int>; -  using Node = typename GraphT::Node; -  using Edge = typename GraphT::Edge; -  using size_type = typename GraphT::size_type; +  using Node = GraphT::Node; +  using Edge = GraphT::Edge; +  using size_type = GraphT::size_type;    MachineGadgetGraph(std::unique_ptr<Node[]> Nodes,                       std::unique_ptr<Edge[]> Edges, size_type NodesSize,                       size_type EdgesSize, int NumFences = 0, int NumGadgets = 0) @@ -191,10 +191,10 @@ template <>  struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {    using GraphType = MachineGadgetGraph;    using Traits = llvm::GraphTraits<GraphType *>; -  using NodeRef = typename Traits::NodeRef; -  using EdgeRef = typename Traits::EdgeRef; -  using ChildIteratorType = typename Traits::ChildIteratorType; -  using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType; +  using NodeRef = Traits::NodeRef; +  using EdgeRef = Traits::EdgeRef; +  using ChildIteratorType = Traits::ChildIteratorType; +  using ChildEdgeIteratorType = Traits::ChildEdgeIteratorType;    DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} @@ -227,9 +227,6 @@ struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {  } // end namespace llvm -constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel; -constexpr int MachineGadgetGraph::GadgetEdgeSentinel; -  char X86LoadValueInjectionLoadHardeningPass::ID = 0;  void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage( @@ -335,7 +332,7 @@ X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(    L.computePhiInfo();    GraphBuilder Builder; -  using GraphIter = typename GraphBuilder::BuilderNodeRef; +  using GraphIter = GraphBuilder::BuilderNodeRef;    DenseMap<MachineInstr *, GraphIter> NodeMap;    int FenceCount = 0, GadgetCount = 0;    auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) { diff --git a/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp b/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp index cda07e8..f55bc9c 100644 --- a/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp +++ b/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp @@ -32,7 +32,7 @@ using namespace llvm::MachO;  using namespace llvm::MachO::DylibReader;  using TripleVec = std::vector<Triple>; -static typename TripleVec::iterator emplace(TripleVec &Container, Triple &&T) { +static TripleVec::iterator emplace(TripleVec &Container, Triple &&T) {    auto I = partition_point(Container, [=](const Triple &CT) {      return std::forward_as_tuple(CT.getArch(), CT.getOS(),                                   CT.getEnvironment()) < diff --git a/llvm/lib/TextAPI/RecordVisitor.cpp b/llvm/lib/TextAPI/RecordVisitor.cpp index d333b33..24971a7 100644 --- a/llvm/lib/TextAPI/RecordVisitor.cpp +++ b/llvm/lib/TextAPI/RecordVisitor.cpp @@ -15,7 +15,7 @@  using namespace llvm;  using namespace llvm::MachO; -RecordVisitor::~RecordVisitor() {} +RecordVisitor::~RecordVisitor() = default;  void RecordVisitor::visitObjCInterface(const ObjCInterfaceRecord &) {}  void RecordVisitor::visitObjCCategory(const ObjCCategoryRecord &) {} diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h index e05fe28..1e549f1 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCloner.h +++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h @@ -77,7 +77,7 @@ public:        : OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind),          Builder(OrigF.getContext()), TTI(TTI) {} -  virtual ~BaseCloner() {} +  virtual ~BaseCloner() = default;    /// Create a clone for a continuation lowering.    static Function *createClone(Function &OrigF, const Twine &Suffix, diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 5048561..5ed47ae 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -3619,7 +3619,7 @@ struct AAIntraFnReachabilityFunction final        return true;      RQITy StackRQI(A, From, To, ExclusionSet, false); -    typename RQITy::Reachable Result; +    RQITy::Reachable Result;      if (!NonConstThis->checkQueryCache(A, StackRQI, Result))        return NonConstThis->isReachableImpl(A, StackRQI,                                             /*IsTemporaryRQI=*/true); @@ -10701,7 +10701,7 @@ struct AAInterFnReachabilityFunction      auto *NonConstThis = const_cast<AAInterFnReachabilityFunction *>(this);      RQITy StackRQI(A, From, To, ExclusionSet, false); -    typename RQITy::Reachable Result; +    RQITy::Reachable Result;      if (!NonConstThis->checkQueryCache(A, StackRQI, Result))        return NonConstThis->isReachableImpl(A, StackRQI,                                             /*IsTemporaryRQI=*/true); diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 894d83f..d35ae47 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1034,11 +1034,11 @@ private:  } // namespace  template <> -struct llvm::DenseMapInfo<typename CallsiteContextGraph< +struct llvm::DenseMapInfo<CallsiteContextGraph<      ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>      : public DenseMapInfo<std::pair<Instruction *, unsigned>> {};  template <> -struct llvm::DenseMapInfo<typename CallsiteContextGraph< +struct llvm::DenseMapInfo<CallsiteContextGraph<      IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>      : public DenseMapInfo<std::pair<IndexCall, unsigned>> {};  template <> diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index d7eb745..2a87a0f 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -208,7 +208,7 @@ namespace KernelInfo {  // };  #define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX)                                    \ -  constexpr const unsigned MEMBER##Idx = IDX; +  constexpr unsigned MEMBER##Idx = IDX;  KERNEL_ENVIRONMENT_IDX(Configuration, 0)  KERNEL_ENVIRONMENT_IDX(Ident, 1) @@ -216,7 +216,7 @@ KERNEL_ENVIRONMENT_IDX(Ident, 1)  #undef KERNEL_ENVIRONMENT_IDX  #define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX)                      \ -  constexpr const unsigned MEMBER##Idx = IDX; +  constexpr unsigned MEMBER##Idx = IDX;  KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0)  KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1) @@ -258,7 +258,7 @@ KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxTeams)  GlobalVariable *  getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) { -  constexpr const int InitKernelEnvironmentArgNo = 0; +  constexpr int InitKernelEnvironmentArgNo = 0;    return cast<GlobalVariable>(        KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo)            ->stripPointerCasts()); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 3ddf182..cbaff29 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3997,6 +3997,27 @@ static Value *foldOrUnsignedUMulOverflowICmp(BinaryOperator &I,    return nullptr;  } +/// Fold select(X >s 0, 0, -X) | smax(X, 0) --> abs(X) +///      select(X <s 0, -X, 0) | smax(X, 0) --> abs(X) +static Value *FoldOrOfSelectSmaxToAbs(BinaryOperator &I, +                                      InstCombiner::BuilderTy &Builder) { +  Value *X; +  Value *Sel; +  if (match(&I, +            m_c_Or(m_Value(Sel), m_OneUse(m_SMax(m_Value(X), m_ZeroInt()))))) { +    auto NegX = m_Neg(m_Specific(X)); +    if (match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(X), +                                           m_ZeroInt()), +                            m_ZeroInt(), NegX)) || +        match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(X), +                                           m_ZeroInt()), +                            NegX, m_ZeroInt()))) +      return Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, +                                           Builder.getFalse()); +  } +  return nullptr; +} +  // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches  // here. We should standardize that construct where it is needed or choose some  // other way to ensure that commutated variants of patterns are not missed. @@ -4545,6 +4566,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {      if (Value *V = SimplifyAddWithRemainder(I))        return replaceInstUsesWith(I, V); +  if (Value *Res = FoldOrOfSelectSmaxToAbs(I, Builder)) +    return replaceInstUsesWith(I, Res); +    return nullptr;  } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index f5130da..9572f9d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3599,6 +3599,21 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {                                   m_Not(m_Specific(SelCond->getTrueValue())));        if (MayNeedFreeze)          C = Builder.CreateFreeze(C); +      if (!ProfcheckDisableMetadataFixes) { +        Value *C2 = nullptr, *A2 = nullptr, *B2 = nullptr; +        if (match(CondVal, m_LogicalAnd(m_Specific(C), m_Value(A2))) && +            SelCond) { +          return SelectInst::Create(C, A, B, "", nullptr, SelCond); +        } else if (match(FalseVal, +                         m_LogicalAnd(m_Not(m_Value(C2)), m_Value(B2))) && +                   SelFVal) { +          SelectInst *NewSI = SelectInst::Create(C, A, B, "", nullptr, SelFVal); +          NewSI->swapProfMetadata(); +          return NewSI; +        } else { +          return createSelectInstWithUnknownProfile(C, A, B); +        } +      }        return SelectInst::Create(C, A, B);      } @@ -3615,6 +3630,20 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {                                   m_Not(m_Specific(SelFVal->getTrueValue())));        if (MayNeedFreeze)          C = Builder.CreateFreeze(C); +      if (!ProfcheckDisableMetadataFixes) { +        Value *C2 = nullptr, *A2 = nullptr, *B2 = nullptr; +        if (match(CondVal, m_LogicalAnd(m_Not(m_Value(C2)), m_Value(A2))) && +            SelCond) { +          SelectInst *NewSI = SelectInst::Create(C, B, A, "", nullptr, SelCond); +          NewSI->swapProfMetadata(); +          return NewSI; +        } else if (match(FalseVal, m_LogicalAnd(m_Specific(C), m_Value(B2))) && +                   SelFVal) { +          return SelectInst::Create(C, B, A, "", nullptr, SelFVal); +        } else { +          return createSelectInstWithUnknownProfile(C, B, A); +        } +      }        return SelectInst::Create(C, B, A);      }    } diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp index 80e77e09..a2fad02 100644 --- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp @@ -161,7 +161,7 @@ template <char NsanTypeId>  class ShadowTypeConfigImpl : public ShadowTypeConfig {  public:    char getNsanTypeId() const override { return NsanTypeId; } -  static constexpr const char kNsanTypeId = NsanTypeId; +  static constexpr char kNsanTypeId = NsanTypeId;  };  // `double` (`d`) shadow type. diff --git a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp index 89980d5..a577f51 100644 --- a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp +++ b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp @@ -122,7 +122,8 @@ DropUnnecessaryAssumesPass::run(Function &F, FunctionAnalysisManager &FAM) {      Value *Cond = Assume->getArgOperand(0);      // Don't drop type tests, which have special semantics. -    if (match(Cond, m_Intrinsic<Intrinsic::type_test>())) +    if (match(Cond, m_Intrinsic<Intrinsic::type_test>()) || +        match(Cond, m_Intrinsic<Intrinsic::public_type_test>()))        continue;      SmallVector<Value *> Affected; diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp index a06f832..d564e32 100644 --- a/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -514,7 +514,7 @@ public:  class GVNSink {  public: -  GVNSink() {} +  GVNSink() = default;    bool run(Function &F) {      LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 0f3978f..0a8f5ea 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -143,8 +143,8 @@ struct SubGraphTraits {    class WrappedSuccIterator        : public iterator_adaptor_base<              WrappedSuccIterator, BaseSuccIterator, -            typename std::iterator_traits<BaseSuccIterator>::iterator_category, -            NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { +            std::iterator_traits<BaseSuccIterator>::iterator_category, NodeRef, +            std::ptrdiff_t, NodeRef *, NodeRef> {      SmallDenseSet<RegionNode *> *Nodes;    public: @@ -558,11 +558,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {    } else {      // Test for successors as back edge      BasicBlock *BB = N->getNodeAs<BasicBlock>(); -    BranchInst *Term = cast<BranchInst>(BB->getTerminator()); - -    for (BasicBlock *Succ : Term->successors()) -      if (Visited.count(Succ)) -        Loops[Succ] = BB; +    if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator())) +      for (BasicBlock *Succ : Term->successors()) +        if (Visited.count(Succ)) +          Loops[Succ] = BB;    }  } @@ -594,7 +593,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {    for (BasicBlock *P : predecessors(BB)) {      // Ignore it if it's a branch from outside into our region entry -    if (!ParentRegion->contains(P)) +    if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))        continue;      Region *R = RI->getRegionFor(P); @@ -1402,13 +1401,17 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {  /// Run the transformation for each region found  bool StructurizeCFG::run(Region *R, DominatorTree *DT,                           const TargetTransformInfo *TTI) { -  if (R->isTopLevelRegion()) +  // CallBr and its corresponding direct target blocks are for now ignored by +  // this pass. This is not a limitation for the currently intended uses cases +  // of callbr in the AMDGPU backend. +  // Parent and child regions are not affected by this (current) restriction. +  // See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details. +  if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))      return false;    this->DT = DT;    this->TTI = TTI;    Func = R->getEntry()->getParent(); -  assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");    ParentRegion = R; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 46f2903..a03cf6e 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3416,7 +3416,11 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C,    // Create integer constant expression.    auto createIntegerExpression = [&DIB](const Constant &CV) -> DIExpression * {      const APInt &API = cast<ConstantInt>(&CV)->getValue(); -    std::optional<int64_t> InitIntOpt = API.trySExtValue(); +    std::optional<int64_t> InitIntOpt; +    if (API.getBitWidth() == 1) +      InitIntOpt = API.tryZExtValue(); +    else +      InitIntOpt = API.trySExtValue();      return InitIntOpt ? DIB.createConstantValueExpression(                              static_cast<uint64_t>(*InitIntOpt))                        : nullptr; diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 8be471b..6e60b94 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -992,9 +992,12 @@ BranchProbability llvm::getBranchProbability(BranchInst *B,    uint64_t Weight0, Weight1;    if (!extractBranchWeights(*B, Weight0, Weight1))      return BranchProbability::getUnknown(); +  uint64_t Denominator = Weight0 + Weight1; +  if (Denominator == 0) +    return BranchProbability::getUnknown();    if (!ForFirstTarget)      std::swap(Weight0, Weight1); -  return BranchProbability::getBranchProbability(Weight0, Weight0 + Weight1); +  return BranchProbability::getBranchProbability(Weight0, Denominator);  }  bool llvm::setBranchProbability(BranchInst *B, BranchProbability P, diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index 94c5c170..e86ab13 100644 --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -158,6 +158,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {    SmallVector<BasicBlock *, 8> CallBrTargetBlocksToFix;    // Redirect exiting edges through a control flow hub.    ControlFlowHub CHub; +  bool Changed = false;    for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {      BasicBlock *BB = ExitingBlocks[I]; @@ -182,6 +183,10 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {          bool UpdatedLI = false;          BasicBlock *NewSucc =              SplitCallBrEdge(BB, Succ, J, &DTU, nullptr, &LI, &UpdatedLI); +        // SplitCallBrEdge modifies the CFG because it creates an intermediate +        // block. So we need to set the changed flag no matter what the +        // ControlFlowHub is going to do later. +        Changed = true;          // Even if CallBr and Succ do not have a common parent loop, we need to          // add the new target block to the parent loop of the current loop.          if (!UpdatedLI) @@ -207,6 +212,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {    bool ChangedCFG;    std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize(        &DTU, GuardBlocks, "loop.exit", MaxBooleansInControlFlowHub.getValue()); +  ChangedCFG |= Changed;    if (!ChangedCFG)      return false; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 34b405c..bf3f52c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -20975,6 +20975,27 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,    if (isa<PHINode>(S.getMainOp()) ||        isVectorLikeInstWithConstOps(S.getMainOp()))      return nullptr; +  // If the parent node is non-schedulable and the current node is copyable, and +  // any of parent instructions are used outside several basic blocks or in +  // bin-op node - cancel scheduling, it may cause wrong def-use deps in +  // analysis, leading to a crash. +  // Non-scheduled nodes may not have related ScheduleData model, which may lead +  // to a skipped dep analysis. +  if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() && +      EI.UserTE->doesNotNeedToSchedule() && +      EI.UserTE->getOpcode() != Instruction::PHI && +      any_of(EI.UserTE->Scalars, [](Value *V) { +        auto *I = dyn_cast<Instruction>(V); +        if (!I || I->hasOneUser()) +          return false; +        for (User *U : I->users()) { +          auto *UI = cast<Instruction>(U); +          if (isa<BinaryOperator>(UI)) +            return true; +        } +        return false; +      })) +    return std::nullopt;    bool HasCopyables = S.areInstructionsWithCopyableElements();    if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||         all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) { diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp index 9c869dd..d354933 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp @@ -92,7 +92,7 @@ void MemDGNode::print(raw_ostream &OS, bool PrintDeps) const {    DGNode::print(OS, false);    if (PrintDeps) {      // Print memory preds. -    static constexpr const unsigned Indent = 4; +    static constexpr unsigned Indent = 4;      for (auto *Pred : MemPreds)        OS.indent(Indent) << "<-" << *Pred->getInstruction() << "\n";    } diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index 86dbd21..5534da9 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -25,14 +25,14 @@ static cl::opt<bool>                            "emit new instructions (*very* expensive)."));  #endif // NDEBUG -static constexpr const unsigned long StopAtDisabled = +static constexpr unsigned long StopAtDisabled =      std::numeric_limits<unsigned long>::max();  static cl::opt<unsigned long>      StopAt("sbvec-stop-at", cl::init(StopAtDisabled), cl::Hidden,             cl::desc("Vectorize if the invocation count is < than this. 0 "                      "disables vectorization.")); -static constexpr const unsigned long StopBundleDisabled = +static constexpr unsigned long StopBundleDisabled =      std::numeric_limits<unsigned long>::max();  static cl::opt<unsigned long>      StopBundle("sbvec-stop-bndl", cl::init(StopBundleDisabled), cl::Hidden, diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp index ed2f80b..2de6921 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp @@ -43,7 +43,7 @@ cl::opt<std::string> AllowFiles(      "sbvec-allow-files", cl::init(".*"), cl::Hidden,      cl::desc("Run the vectorizer only on file paths that match any in the "               "list of comma-separated regex's.")); -static constexpr const char AllowFilesDelim = ','; +static constexpr char AllowFilesDelim = ',';  SandboxVectorizerPass::SandboxVectorizerPass() : FPM("fpm") {    if (UserDefinedPassPipeline == DefaultPipelineMagicStr) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 9081ad7..cfe1f1e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -939,7 +939,7 @@ class VPIRMetadata {    SmallVector<std::pair<unsigned, MDNode *>> Metadata;  public: -  VPIRMetadata() {} +  VPIRMetadata() = default;    /// Adds metatadata that can be preserved from the original instruction    /// \p I. @@ -950,12 +950,9 @@ public:    VPIRMetadata(Instruction &I, LoopVersioning *LVer);    /// Copy constructor for cloning. -  VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {} +  VPIRMetadata(const VPIRMetadata &Other) = default; -  VPIRMetadata &operator=(const VPIRMetadata &Other) { -    Metadata = Other.Metadata; -    return *this; -  } +  VPIRMetadata &operator=(const VPIRMetadata &Other) = default;    /// Add all metadata to \p I.    void applyMetadata(Instruction &I) const; @@ -1113,9 +1110,8 @@ public:    VP_CLASSOF_IMPL(VPDef::VPInstructionSC)    VPInstruction *clone() override { -    SmallVector<VPValue *, 2> Operands(operands()); -    auto *New = -        new VPInstruction(Opcode, Operands, *this, *this, getDebugLoc(), Name); +    auto *New = new VPInstruction(Opcode, operands(), *this, *this, +                                  getDebugLoc(), Name);      if (getUnderlyingValue())        New->setUnderlyingValue(getUnderlyingInstr());      return New; @@ -1229,10 +1225,9 @@ public:    }    VPInstruction *clone() override { -    SmallVector<VPValue *, 2> Operands(operands());      auto *New = -        new VPInstructionWithType(getOpcode(), Operands, getResultType(), *this, -                                  getDebugLoc(), getName()); +        new VPInstructionWithType(getOpcode(), operands(), getResultType(), +                                  *this, getDebugLoc(), getName());      New->setUnderlyingValue(getUnderlyingValue());      return New;    } @@ -3214,6 +3209,9 @@ protected:        : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),          Alignment(Alignment), Consecutive(Consecutive), Reverse(Reverse) {      assert((Consecutive || !Reverse) && "Reverse implies consecutive"); +    assert(isa<VPVectorEndPointerRecipe>(getAddr()) || +           !Reverse && +               "Reversed acccess without VPVectorEndPointerRecipe address?");    }  public: @@ -3985,7 +3983,7 @@ class VPIRBasicBlock : public VPBasicBlock {          IRBB(IRBB) {}  public: -  ~VPIRBasicBlock() override {} +  ~VPIRBasicBlock() override = default;    static inline bool classof(const VPBlockBase *V) {      return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC; @@ -4037,7 +4035,7 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {          IsReplicator(IsReplicator) {}  public: -  ~VPRegionBlock() override {} +  ~VPRegionBlock() override = default;    /// Method to support type inquiry through isa, cast, and dyn_cast.    static inline bool classof(const VPBlockBase *V) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index b5b98c6..b57c448 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -313,7 +313,8 @@ private:      // Check for recipes that do not have opcodes.      if constexpr (std::is_same_v<RecipeTy, VPScalarIVStepsRecipe> ||                    std::is_same_v<RecipeTy, VPCanonicalIVPHIRecipe> || -                  std::is_same_v<RecipeTy, VPDerivedIVRecipe>) +                  std::is_same_v<RecipeTy, VPDerivedIVRecipe> || +                  std::is_same_v<RecipeTy, VPVectorEndPointerRecipe>)        return DefR;      else        return DefR && DefR->getOpcode() == Opcode; @@ -686,6 +687,64 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {    return VPDerivedIV_match<Op0_t, Op1_t, Op2_t>({Op0, Op1, Op2});  } +template <typename Addr_t, typename Mask_t> struct Load_match { +  Addr_t Addr; +  Mask_t Mask; + +  Load_match(Addr_t Addr, Mask_t Mask) : Addr(Addr), Mask(Mask) {} + +  template <typename OpTy> bool match(const OpTy *V) const { +    auto *Load = dyn_cast<VPWidenLoadRecipe>(V); +    if (!Load || !Addr.match(Load->getAddr()) || !Load->isMasked() || +        !Mask.match(Load->getMask())) +      return false; +    return true; +  } +}; + +/// Match a (possibly reversed) masked load. +template <typename Addr_t, typename Mask_t> +inline Load_match<Addr_t, Mask_t> m_MaskedLoad(const Addr_t &Addr, +                                               const Mask_t &Mask) { +  return Load_match<Addr_t, Mask_t>(Addr, Mask); +} + +template <typename Addr_t, typename Val_t, typename Mask_t> struct Store_match { +  Addr_t Addr; +  Val_t Val; +  Mask_t Mask; + +  Store_match(Addr_t Addr, Val_t Val, Mask_t Mask) +      : Addr(Addr), Val(Val), Mask(Mask) {} + +  template <typename OpTy> bool match(const OpTy *V) const { +    auto *Store = dyn_cast<VPWidenStoreRecipe>(V); +    if (!Store || !Addr.match(Store->getAddr()) || +        !Val.match(Store->getStoredValue()) || !Store->isMasked() || +        !Mask.match(Store->getMask())) +      return false; +    return true; +  } +}; + +/// Match a (possibly reversed) masked store. +template <typename Addr_t, typename Val_t, typename Mask_t> +inline Store_match<Addr_t, Val_t, Mask_t> +m_MaskedStore(const Addr_t &Addr, const Val_t &Val, const Mask_t &Mask) { +  return Store_match<Addr_t, Val_t, Mask_t>(Addr, Val, Mask); +} + +template <typename Op0_t, typename Op1_t> +using VectorEndPointerRecipe_match = +    Recipe_match<std::tuple<Op0_t, Op1_t>, 0, +                 /*Commutative*/ false, VPVectorEndPointerRecipe>; + +template <typename Op0_t, typename Op1_t> +VectorEndPointerRecipe_match<Op0_t, Op1_t> m_VecEndPtr(const Op0_t &Op0, +                                                       const Op1_t &Op1) { +  return VectorEndPointerRecipe_match<Op0_t, Op1_t>(Op0, Op1); +} +  /// Match a call argument at a given argument index.  template <typename Opnd_t> struct Argument_match {    /// Call argument index to match. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1a02117..1ee405a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -162,8 +162,12 @@ bool VPRecipeBase::mayHaveSideEffects() const {    case VPPredInstPHISC:    case VPVectorEndPointerSC:      return false; -  case VPInstructionSC: -    return mayWriteToMemory(); +  case VPInstructionSC: { +    auto *VPI = cast<VPInstruction>(this); +    return mayWriteToMemory() || +           VPI->getOpcode() == VPInstruction::BranchOnCount || +           VPI->getOpcode() == VPInstruction::BranchOnCond; +  }    case VPWidenCallSC: {      Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();      return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn(); @@ -1241,6 +1245,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {    case Instruction::Select:    case Instruction::PHI:    case VPInstruction::AnyOf: +  case VPInstruction::BranchOnCond: +  case VPInstruction::BranchOnCount:    case VPInstruction::Broadcast:    case VPInstruction::BuildStructVector:    case VPInstruction::BuildVector: diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.h b/llvm/lib/Transforms/Vectorize/VPlanSLP.h index 77ff36c..44972c68 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanSLP.h +++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.h @@ -89,8 +89,7 @@ class VPlanSlp {    /// Width of the widest combined bundle in bits.    unsigned WidestBundleBits = 0; -  using MultiNodeOpTy = -      typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>; +  using MultiNodeOpTy = std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;    // Input operand bundles for the current multi node. Each multi node operand    // bundle contains values not matching the multi node's opcode. They will diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index f50bf29..9d9bb14 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -151,7 +151,27 @@ static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) {  static bool sinkScalarOperands(VPlan &Plan) {    auto Iter = vp_depth_first_deep(Plan.getEntry()); +  bool ScalarVFOnly = Plan.hasScalarVFOnly();    bool Changed = false; + +  auto IsValidSinkCandidate = [ScalarVFOnly](VPBasicBlock *SinkTo, +                                             VPSingleDefRecipe *Candidate) { +    // We only know how to duplicate VPReplicateRecipes and +    // VPScalarIVStepsRecipes for now. +    if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Candidate)) +      return false; + +    if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() || +        Candidate->mayReadOrWriteMemory()) +      return false; + +    if (auto *RepR = dyn_cast<VPReplicateRecipe>(Candidate)) +      if (!ScalarVFOnly && RepR->isSingleScalar()) +        return false; + +    return true; +  }; +    // First, collect the operands of all recipes in replicate blocks as seeds for    // sinking.    SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList; @@ -159,51 +179,37 @@ static bool sinkScalarOperands(VPlan &Plan) {      VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();      if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)        continue; -    VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]); -    if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock()) +    VPBasicBlock *VPBB = cast<VPBasicBlock>(EntryVPBB->getSuccessors().front()); +    if (VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())        continue;      for (auto &Recipe : *VPBB) { -      for (VPValue *Op : Recipe.operands()) +      for (VPValue *Op : Recipe.operands()) {          if (auto *Def =                  dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe())) -          WorkList.insert({VPBB, Def}); +          if (IsValidSinkCandidate(VPBB, Def)) +            WorkList.insert({VPBB, Def}); +      }      }    } -  bool ScalarVFOnly = Plan.hasScalarVFOnly();    // Try to sink each replicate or scalar IV steps recipe in the worklist.    for (unsigned I = 0; I != WorkList.size(); ++I) {      VPBasicBlock *SinkTo;      VPSingleDefRecipe *SinkCandidate;      std::tie(SinkTo, SinkCandidate) = WorkList[I]; -    if (SinkCandidate->getParent() == SinkTo || -        SinkCandidate->mayHaveSideEffects() || -        SinkCandidate->mayReadOrWriteMemory()) -      continue; -    if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) { -      if (!ScalarVFOnly && RepR->isSingleScalar()) -        continue; -    } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate)) -      continue; -    bool NeedsDuplicating = false;      // All recipe users of the sink candidate must be in the same block SinkTo -    // or all users outside of SinkTo must be uniform-after-vectorization ( -    // i.e., only first lane is used) . In the latter case, we need to duplicate -    // SinkCandidate. -    auto CanSinkWithUser = [SinkTo, &NeedsDuplicating, -                            SinkCandidate](VPUser *U) { -      auto *UI = cast<VPRecipeBase>(U); -      if (UI->getParent() == SinkTo) -        return true; -      NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate); -      // We only know how to duplicate VPReplicateRecipes and -      // VPScalarIVStepsRecipes for now. -      return NeedsDuplicating && -             isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(SinkCandidate); -    }; -    if (!all_of(SinkCandidate->users(), CanSinkWithUser)) +    // or all users outside of SinkTo must have only their first lane used. In +    // the latter case, we need to duplicate SinkCandidate. +    auto UsersOutsideSinkTo = +        make_filter_range(SinkCandidate->users(), [SinkTo](VPUser *U) { +          return cast<VPRecipeBase>(U)->getParent() != SinkTo; +        }); +    if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) { +          return !U->onlyFirstLaneUsed(SinkCandidate); +        }))        continue; +    bool NeedsDuplicating = !UsersOutsideSinkTo.empty();      if (NeedsDuplicating) {        if (ScalarVFOnly) @@ -230,7 +236,8 @@ static bool sinkScalarOperands(VPlan &Plan) {      for (VPValue *Op : SinkCandidate->operands())        if (auto *Def =                dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe())) -        WorkList.insert({SinkTo, Def}); +        if (IsValidSinkCandidate(SinkTo, Def)) +          WorkList.insert({SinkTo, Def});      Changed = true;    }    return Changed; @@ -1056,13 +1063,9 @@ static VPValue *tryToFoldLiveIns(VPSingleDefRecipe &R,    return nullptr;  } -/// Try to simplify recipe \p R. -static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { -  VPlan *Plan = R.getParent()->getPlan(); - -  auto *Def = dyn_cast<VPSingleDefRecipe>(&R); -  if (!Def) -    return; +/// Try to simplify VPSingleDefRecipe \p Def. +static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) { +  VPlan *Plan = Def->getParent()->getPlan();    // Simplification of live-in IR values for SingleDef recipes using    // InstSimplifyFolder. @@ -1072,7 +1075,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {      return Def->replaceAllUsesWith(V);    // Fold PredPHI LiveIn -> LiveIn. -  if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) { +  if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(Def)) {      VPValue *Op = PredPHI->getOperand(0);      if (Op->isLiveIn())        PredPHI->replaceAllUsesWith(Op); @@ -1091,12 +1094,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {          return;        if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) { -        unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue())) +        unsigned ExtOpcode = match(Def->getOperand(0), m_SExt(m_VPValue()))                                   ? Instruction::SExt                                   : Instruction::ZExt;          auto *Ext = Builder.createWidenCast(Instruction::CastOps(ExtOpcode), A,                                              TruncTy); -        if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) { +        if (auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {            // UnderlyingExt has distinct return type, used to retain legacy cost.            Ext->setUnderlyingValue(UnderlyingExt);          } @@ -1159,7 +1162,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {          Builder.createLogicalAnd(X, Builder.createOr(Y, Z)));    // x && !x -> 0 -  if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X))))) +  if (match(Def, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X)))))      return Def->replaceAllUsesWith(Plan->getFalse());    if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X)))) @@ -1187,8 +1190,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {      return Def->replaceAllUsesWith(A);    if (match(Def, m_c_Mul(m_VPValue(A), m_ZeroInt()))) -    return Def->replaceAllUsesWith(R.getOperand(0) == A ? R.getOperand(1) -                                                        : R.getOperand(0)); +    return Def->replaceAllUsesWith( +        Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0));    if (match(Def, m_Not(m_VPValue(A)))) {      if (match(A, m_Not(m_VPValue(A)))) @@ -1217,8 +1220,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {          }          // If Cmp doesn't have a debug location, use the one from the negation,          // to preserve the location. -        if (!Cmp->getDebugLoc() && R.getDebugLoc()) -          Cmp->setDebugLoc(R.getDebugLoc()); +        if (!Cmp->getDebugLoc() && Def->getDebugLoc()) +          Cmp->setDebugLoc(Def->getDebugLoc());        }      }    } @@ -1244,7 +1247,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {    if (match(Def, m_Intrinsic<Intrinsic::vp_merge>(m_True(), m_VPValue(A),                                                    m_VPValue(X), m_VPValue())) &&        match(A, m_c_BinaryOr(m_Specific(X), m_VPValue(Y))) && -      TypeInfo.inferScalarType(R.getVPSingleValue())->isIntegerTy(1)) { +      TypeInfo.inferScalarType(Def)->isIntegerTy(1)) {      Def->setOperand(1, Def->getOperand(0));      Def->setOperand(0, Y);      return; @@ -1252,36 +1255,36 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {    if (auto *Phi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(Def)) {      if (Phi->getOperand(0) == Phi->getOperand(1)) -      Def->replaceAllUsesWith(Phi->getOperand(0)); +      Phi->replaceAllUsesWith(Phi->getOperand(0));      return;    }    // Look through ExtractLastElement (BuildVector ....). -  if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()), -                            m_ExtractLastLanePerPart(m_BuildVector())))) { -    auto *BuildVector = cast<VPInstruction>(R.getOperand(0)); +  if (match(Def, m_CombineOr(m_ExtractLastElement(m_BuildVector()), +                             m_ExtractLastLanePerPart(m_BuildVector())))) { +    auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));      Def->replaceAllUsesWith(          BuildVector->getOperand(BuildVector->getNumOperands() - 1));      return;    }    // Look through ExtractPenultimateElement (BuildVector ....). -  if (match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>( -                    m_BuildVector()))) { -    auto *BuildVector = cast<VPInstruction>(R.getOperand(0)); +  if (match(Def, m_VPInstruction<VPInstruction::ExtractPenultimateElement>( +                     m_BuildVector()))) { +    auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));      Def->replaceAllUsesWith(          BuildVector->getOperand(BuildVector->getNumOperands() - 2));      return;    }    uint64_t Idx; -  if (match(&R, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) { -    auto *BuildVector = cast<VPInstruction>(R.getOperand(0)); +  if (match(Def, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) { +    auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));      Def->replaceAllUsesWith(BuildVector->getOperand(Idx));      return;    } -  if (match(Def, m_BuildVector()) && all_equal(R.operands())) { +  if (match(Def, m_BuildVector()) && all_equal(Def->operands())) {      Def->replaceAllUsesWith(          Builder.createNaryOp(VPInstruction::Broadcast, Def->getOperand(0)));      return; @@ -1303,7 +1306,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {        isa<VPPhi>(X)) {      auto *Phi = cast<VPPhi>(X);      if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) && -        Phi->getNumUsers() == 1 && (*Phi->user_begin() == &R)) { +        Phi->getNumUsers() == 1 && (*Phi->user_begin() == Def)) {        Phi->setOperand(0, Y);        Def->replaceAllUsesWith(Phi);        return; @@ -1311,7 +1314,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {    }    // VPVectorPointer for part 0 can be replaced by their start pointer. -  if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) { +  if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(Def)) {      if (VecPtr->isFirstPart()) {        VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));        return; @@ -1366,9 +1369,9 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan) {        Plan.getEntry());    VPTypeAnalysis TypeInfo(Plan);    for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { -    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { -      simplifyRecipe(R, TypeInfo); -    } +    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) +      if (auto *Def = dyn_cast<VPSingleDefRecipe>(&R)) +        simplifyRecipe(Def, TypeInfo);    }  } @@ -2521,90 +2524,102 @@ void VPlanTransforms::addActiveLaneMask(    HeaderMask->eraseFromParent();  } +template <typename Op0_t, typename Op1_t> struct RemoveMask_match { +  Op0_t In; +  Op1_t &Out; + +  RemoveMask_match(const Op0_t &In, Op1_t &Out) : In(In), Out(Out) {} + +  template <typename OpTy> bool match(OpTy *V) const { +    if (m_Specific(In).match(V)) { +      Out = nullptr; +      return true; +    } +    if (m_LogicalAnd(m_Specific(In), m_VPValue(Out)).match(V)) +      return true; +    return false; +  } +}; + +/// Match a specific mask \p In, or a combination of it (logical-and In, Out). +/// Returns the remaining part \p Out if so, or nullptr otherwise. +template <typename Op0_t, typename Op1_t> +static inline RemoveMask_match<Op0_t, Op1_t> m_RemoveMask(const Op0_t &In, +                                                          Op1_t &Out) { +  return RemoveMask_match<Op0_t, Op1_t>(In, Out); +} +  /// Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding  /// EVL-based recipe without the header mask. Returns nullptr if no EVL-based  /// recipe could be created.  /// \p HeaderMask  Header Mask.  /// \p CurRecipe   Recipe to be transform.  /// \p TypeInfo    VPlan-based type analysis. -/// \p AllOneMask  The vector mask parameter of vector-predication intrinsics.  /// \p EVL         The explicit vector length parameter of vector-predication  /// intrinsics.  static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,                                         VPRecipeBase &CurRecipe, -                                       VPTypeAnalysis &TypeInfo, -                                       VPValue &AllOneMask, VPValue &EVL) { -  // FIXME: Don't transform recipes to EVL recipes if they're not masked by the -  // header mask. -  auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * { -    assert(OrigMask && "Unmasked recipe when folding tail"); -    // HeaderMask will be handled using EVL. -    VPValue *Mask; -    if (match(OrigMask, m_LogicalAnd(m_Specific(HeaderMask), m_VPValue(Mask)))) -      return Mask; -    return HeaderMask == OrigMask ? nullptr : OrigMask; -  }; +                                       VPTypeAnalysis &TypeInfo, VPValue &EVL) { +  VPlan *Plan = CurRecipe.getParent()->getPlan(); +  VPValue *Addr, *Mask, *EndPtr;    /// Adjust any end pointers so that they point to the end of EVL lanes not VF. -  auto GetNewAddr = [&CurRecipe, &EVL](VPValue *Addr) -> VPValue * { -    auto *EndPtr = dyn_cast<VPVectorEndPointerRecipe>(Addr); -    if (!EndPtr) -      return Addr; -    assert(EndPtr->getOperand(1) == &EndPtr->getParent()->getPlan()->getVF() && -           "VPVectorEndPointerRecipe with non-VF VF operand?"); -    assert( -        all_of(EndPtr->users(), -               [](VPUser *U) { -                 return cast<VPWidenMemoryRecipe>(U)->isReverse(); -               }) && -        "VPVectorEndPointRecipe not used by reversed widened memory recipe?"); -    VPVectorEndPointerRecipe *EVLAddr = EndPtr->clone(); -    EVLAddr->insertBefore(&CurRecipe); -    EVLAddr->setOperand(1, &EVL); -    return EVLAddr; +  auto AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) { +    auto *EVLEndPtr = cast<VPVectorEndPointerRecipe>(EndPtr)->clone(); +    EVLEndPtr->insertBefore(&CurRecipe); +    EVLEndPtr->setOperand(1, &EVL); +    return EVLEndPtr;    }; -  return TypeSwitch<VPRecipeBase *, VPRecipeBase *>(&CurRecipe) -      .Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) { -        VPValue *NewMask = GetNewMask(L->getMask()); -        VPValue *NewAddr = GetNewAddr(L->getAddr()); -        return new VPWidenLoadEVLRecipe(*L, NewAddr, EVL, NewMask); -      }) -      .Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) { -        VPValue *NewMask = GetNewMask(S->getMask()); -        VPValue *NewAddr = GetNewAddr(S->getAddr()); -        return new VPWidenStoreEVLRecipe(*S, NewAddr, EVL, NewMask); -      }) -      .Case<VPInterleaveRecipe>([&](VPInterleaveRecipe *IR) { -        VPValue *NewMask = GetNewMask(IR->getMask()); -        return new VPInterleaveEVLRecipe(*IR, EVL, NewMask); -      }) -      .Case<VPReductionRecipe>([&](VPReductionRecipe *Red) { -        VPValue *NewMask = GetNewMask(Red->getCondOp()); -        return new VPReductionEVLRecipe(*Red, EVL, NewMask); -      }) -      .Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * { -        VPValue *LHS, *RHS; -        // Transform select with a header mask condition -        //   select(header_mask, LHS, RHS) -        // into vector predication merge. -        //   vp.merge(all-true, LHS, RHS, EVL) -        if (!match(VPI, m_Select(m_Specific(HeaderMask), m_VPValue(LHS), -                                 m_VPValue(RHS)))) -          return nullptr; -        // Use all true as the condition because this transformation is -        // limited to selects whose condition is a header mask. -        return new VPWidenIntrinsicRecipe( -            Intrinsic::vp_merge, {&AllOneMask, LHS, RHS, &EVL}, -            TypeInfo.inferScalarType(LHS), VPI->getDebugLoc()); -      }) -      .Default([&](VPRecipeBase *R) { return nullptr; }); +  if (match(&CurRecipe, +            m_MaskedLoad(m_VPValue(Addr), m_RemoveMask(HeaderMask, Mask))) && +      !cast<VPWidenLoadRecipe>(CurRecipe).isReverse()) +    return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), Addr, +                                    EVL, Mask); + +  if (match(&CurRecipe, +            m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) && +      match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) && +      cast<VPWidenLoadRecipe>(CurRecipe).isReverse()) +    return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), +                                    AdjustEndPtr(EndPtr), EVL, Mask); + +  if (match(&CurRecipe, m_MaskedStore(m_VPValue(Addr), m_VPValue(), +                                      m_RemoveMask(HeaderMask, Mask))) && +      !cast<VPWidenStoreRecipe>(CurRecipe).isReverse()) +    return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), Addr, +                                     EVL, Mask); + +  if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(), +                                      m_RemoveMask(HeaderMask, Mask))) && +      match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) && +      cast<VPWidenStoreRecipe>(CurRecipe).isReverse()) +    return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), +                                     AdjustEndPtr(EndPtr), EVL, Mask); + +  if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe)) +    if (Rdx->isConditional() && +        match(Rdx->getCondOp(), m_RemoveMask(HeaderMask, Mask))) +      return new VPReductionEVLRecipe(*Rdx, EVL, Mask); + +  if (auto *Interleave = dyn_cast<VPInterleaveRecipe>(&CurRecipe)) +    if (Interleave->getMask() && +        match(Interleave->getMask(), m_RemoveMask(HeaderMask, Mask))) +      return new VPInterleaveEVLRecipe(*Interleave, EVL, Mask); + +  VPValue *LHS, *RHS; +  if (match(&CurRecipe, +            m_Select(m_Specific(HeaderMask), m_VPValue(LHS), m_VPValue(RHS)))) +    return new VPWidenIntrinsicRecipe( +        Intrinsic::vp_merge, {Plan->getTrue(), LHS, RHS, &EVL}, +        TypeInfo.inferScalarType(LHS), CurRecipe.getDebugLoc()); + +  return nullptr;  }  /// Replace recipes with their EVL variants.  static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {    VPTypeAnalysis TypeInfo(Plan); -  VPValue *AllOneMask = Plan.getTrue();    VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();    VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); @@ -2664,7 +2679,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {              ConstantInt::getSigned(Type::getInt32Ty(Plan.getContext()), -1));          VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe(              Intrinsic::experimental_vp_splice, -            {V1, V2, Imm, AllOneMask, PrevEVL, &EVL}, +            {V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL},              TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc());          VPSplice->insertBefore(&R);          R.getVPSingleValue()->replaceAllUsesWith(VPSplice); @@ -2698,7 +2713,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {    for (VPUser *U : collectUsersRecursively(EVLMask)) {      auto *CurRecipe = cast<VPRecipeBase>(U);      VPRecipeBase *EVLRecipe = -        optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, *AllOneMask, EVL); +        optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, EVL);      if (!EVLRecipe)        continue; @@ -4174,7 +4189,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,    unsigned VFMinVal = VF.getKnownMinValue();    SmallVector<VPInterleaveRecipe *> StoreGroups;    for (auto &R : *VectorLoop->getEntryBasicBlock()) { -    if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount())) +    if (isa<VPCanonicalIVPHIRecipe>(&R))        continue;      if (isa<VPDerivedIVRecipe, VPScalarIVStepsRecipe>(&R) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 8c23e78..c6380d3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -32,22 +32,17 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) {  }  VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { -  VPValue *Expanded = nullptr;    if (auto *E = dyn_cast<SCEVConstant>(Expr)) -    Expanded = Plan.getOrAddLiveIn(E->getValue()); -  else { -    auto *U = dyn_cast<SCEVUnknown>(Expr); -    // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction -    // value. Otherwise the value may be defined in a loop and using it directly -    // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA -    // form. -    if (U && !isa<Instruction>(U->getValue())) { -      Expanded = Plan.getOrAddLiveIn(U->getValue()); -    } else { -      Expanded = new VPExpandSCEVRecipe(Expr); -      Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe()); -    } -  } +    return Plan.getOrAddLiveIn(E->getValue()); +  // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction +  // value. Otherwise the value may be defined in a loop and using it directly +  // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA +  // form. +  auto *U = dyn_cast<SCEVUnknown>(Expr); +  if (U && !isa<Instruction>(U->getValue())) +    return Plan.getOrAddLiveIn(U->getValue()); +  auto *Expanded = new VPExpandSCEVRecipe(Expr); +  Plan.getEntry()->appendRecipe(Expanded);    return Expanded;  }  | 
