diff options
Diffstat (limited to 'llvm/include')
-rw-r--r-- | llvm/include/llvm/ADT/StringSwitch.h | 4 | ||||
-rw-r--r-- | llvm/include/llvm/ADT/bit.h | 24 | ||||
-rw-r--r-- | llvm/include/llvm/BinaryFormat/Dwarf.h | 3 | ||||
-rw-r--r-- | llvm/include/llvm/CAS/OnDiskDataAllocator.h | 2 | ||||
-rw-r--r-- | llvm/include/llvm/CAS/OnDiskGraphDB.h | 470 | ||||
-rw-r--r-- | llvm/include/llvm/CAS/OnDiskKeyValueDB.h | 82 | ||||
-rw-r--r-- | llvm/include/llvm/CodeGen/MIRFSDiscriminatorOptions.h | 22 | ||||
-rw-r--r-- | llvm/include/llvm/ExecutionEngine/Orc/Core.h | 66 | ||||
-rw-r--r-- | llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h | 622 | ||||
-rw-r--r-- | llvm/include/llvm/Frontend/OpenMP/ClauseT.h | 25 | ||||
-rw-r--r-- | llvm/include/llvm/Frontend/OpenMP/OMP.td | 5 | ||||
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAArch64.td | 40 | ||||
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 32 | ||||
-rw-r--r-- | llvm/include/llvm/IR/RuntimeLibcalls.h | 2 | ||||
-rw-r--r-- | llvm/include/llvm/Support/MathExtras.h | 4 |
15 files changed, 1309 insertions, 94 deletions
diff --git a/llvm/include/llvm/ADT/StringSwitch.h b/llvm/include/llvm/ADT/StringSwitch.h index 26d5682..2262b11 100644 --- a/llvm/include/llvm/ADT/StringSwitch.h +++ b/llvm/include/llvm/ADT/StringSwitch.h @@ -98,11 +98,13 @@ public: return CasesImpl({S0, S1, S2}, Value); } + [[deprecated("Pass cases in std::initializer_list instead")]] StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2, StringLiteral S3, T Value) { return CasesImpl({S0, S1, S2, S3}, Value); } + [[deprecated("Pass cases in std::initializer_list instead")]] StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2, StringLiteral S3, StringLiteral S4, T Value) { return CasesImpl({S0, S1, S2, S3, S4}, Value); @@ -179,11 +181,13 @@ public: return CasesLowerImpl({S0, S1, S2}, Value); } + [[deprecated("Pass cases in std::initializer_list instead")]] StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2, StringLiteral S3, T Value) { return CasesLowerImpl({S0, S1, S2, S3}, Value); } + [[deprecated("Pass cases in std::initializer_list instead")]] StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2, StringLiteral S3, StringLiteral S4, T Value) { return CasesLowerImpl({S0, S1, S2, S3, S4}, Value); diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h index 8b60b69..5971b75 100644 --- a/llvm/include/llvm/ADT/bit.h +++ b/llvm/include/llvm/ADT/bit.h @@ -151,7 +151,7 @@ template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>> /// Count the number of set bits in a value. /// Ex. popcount(0xF000F000) = 8 /// Returns 0 if Value is zero. -template <typename T> [[nodiscard]] inline int popcount(T Value) noexcept { +template <typename T> [[nodiscard]] constexpr int popcount(T Value) noexcept { static_assert(std::is_unsigned_v<T>, "T must be an unsigned integer type"); static_assert(sizeof(T) <= 8, "T must be 8 bytes or less"); @@ -178,7 +178,23 @@ template <typename T> [[nodiscard]] inline int popcount(T Value) noexcept { } /// Count number of 0's from the least significant bit to the most -/// stopping at the first 1. +/// stopping at the first 1. +/// +/// A constexpr version of countr_zero. +/// +/// Only unsigned integral types are allowed. +/// +/// Returns std::numeric_limits<T>::digits on an input of 0. +template <typename T> [[nodiscard]] constexpr int countr_zero_constexpr(T Val) { + static_assert(std::is_unsigned_v<T>, + "Only unsigned integral types are allowed."); + // "(Val & -Val) - 1" generates a mask with all bits set up to (but not + // including) the least significant set bit of Val. + return llvm::popcount(static_cast<std::make_unsigned_t<T>>((Val & -Val) - 1)); +} + +/// Count number of 0's from the least significant bit to the most +/// stopping at the first 1. /// /// Only unsigned integral types are allowed. /// @@ -208,9 +224,7 @@ template <typename T> [[nodiscard]] int countr_zero(T Val) { #endif } - // Fallback to popcount. "(Val & -Val) - 1" is a bitmask with all bits below - // the least significant 1 set. - return llvm::popcount(static_cast<std::make_unsigned_t<T>>((Val & -Val) - 1)); + return countr_zero_constexpr(Val); } /// Count number of 0's from the most significant bit to the least diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index 5039a3f..211c026 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -1128,6 +1128,9 @@ struct FormParams { uint8_t getDwarfOffsetByteSize() const { return dwarf::getDwarfOffsetByteSize(Format); } + inline uint64_t getDwarfMaxOffset() const { + return (getDwarfOffsetByteSize() == 4) ? UINT32_MAX : UINT64_MAX; + } explicit operator bool() const { return Version && AddrSize; } }; diff --git a/llvm/include/llvm/CAS/OnDiskDataAllocator.h b/llvm/include/llvm/CAS/OnDiskDataAllocator.h index 2809df8..b7099dc 100644 --- a/llvm/include/llvm/CAS/OnDiskDataAllocator.h +++ b/llvm/include/llvm/CAS/OnDiskDataAllocator.h @@ -64,7 +64,7 @@ public: /// \returns the buffer that was allocated at \p create time, with size /// \p UserHeaderSize. - MutableArrayRef<uint8_t> getUserHeader(); + MutableArrayRef<uint8_t> getUserHeader() const; size_t size() const; size_t capacity() const; diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h new file mode 100644 index 0000000..5f0ee0e --- /dev/null +++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h @@ -0,0 +1,470 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This declares OnDiskGraphDB, an ondisk CAS database with a fixed length +/// hash. This is the class that implements the database storage scheme without +/// exposing the hashing algorithm. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_ONDISKGRAPHDB_H +#define LLVM_CAS_ONDISKGRAPHDB_H + +#include "llvm/ADT/PointerUnion.h" +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "llvm/CAS/OnDiskTrieRawHashMap.h" + +namespace llvm::cas::ondisk { + +/// Standard 8 byte reference inside OnDiskGraphDB. +class InternalRef { +public: + FileOffset getFileOffset() const { return FileOffset(Data); } + uint64_t getRawData() const { return Data; } + + static InternalRef getFromRawData(uint64_t Data) { return InternalRef(Data); } + static InternalRef getFromOffset(FileOffset Offset) { + return InternalRef(Offset.get()); + } + + friend bool operator==(InternalRef LHS, InternalRef RHS) { + return LHS.Data == RHS.Data; + } + +private: + InternalRef(FileOffset Offset) : Data((uint64_t)Offset.get()) {} + InternalRef(uint64_t Data) : Data(Data) {} + uint64_t Data; +}; + +/// Compact 4 byte reference inside OnDiskGraphDB for smaller references. +class InternalRef4B { +public: + FileOffset getFileOffset() const { return FileOffset(Data); } + uint32_t getRawData() const { return Data; } + + /// Shrink to 4B reference. + static std::optional<InternalRef4B> tryToShrink(InternalRef Ref) { + uint64_t Offset = Ref.getRawData(); + if (Offset > UINT32_MAX) + return std::nullopt; + return InternalRef4B(Offset); + } + + operator InternalRef() const { + return InternalRef::getFromOffset(getFileOffset()); + } + +private: + friend class InternalRef; + InternalRef4B(uint32_t Data) : Data(Data) {} + uint32_t Data; +}; + +/// Array of internal node references. +class InternalRefArrayRef { +public: + size_t size() const { return Size; } + bool empty() const { return !Size; } + + class iterator + : public iterator_facade_base<iterator, std::random_access_iterator_tag, + const InternalRef> { + public: + bool operator==(const iterator &RHS) const { return I == RHS.I; } + InternalRef operator*() const { + if (auto *Ref = dyn_cast<const InternalRef *>(I)) + return *Ref; + return InternalRef(*cast<const InternalRef4B *>(I)); + } + bool operator<(const iterator &RHS) const { + assert(isa<const InternalRef *>(I) == isa<const InternalRef *>(RHS.I)); + if (auto *Ref = dyn_cast<const InternalRef *>(I)) + return Ref < cast<const InternalRef *>(RHS.I); + return cast<const InternalRef4B *>(I) - + cast<const InternalRef4B *>(RHS.I); + } + ptrdiff_t operator-(const iterator &RHS) const { + assert(isa<const InternalRef *>(I) == isa<const InternalRef *>(RHS.I)); + if (auto *Ref = dyn_cast<const InternalRef *>(I)) + return Ref - cast<const InternalRef *>(RHS.I); + return cast<const InternalRef4B *>(I) - + cast<const InternalRef4B *>(RHS.I); + } + iterator &operator+=(ptrdiff_t N) { + if (auto *Ref = dyn_cast<const InternalRef *>(I)) + I = Ref + N; + else + I = cast<const InternalRef4B *>(I) + N; + return *this; + } + iterator &operator-=(ptrdiff_t N) { + if (auto *Ref = dyn_cast<const InternalRef *>(I)) + I = Ref - N; + else + I = cast<const InternalRef4B *>(I) - N; + return *this; + } + InternalRef operator[](ptrdiff_t N) const { return *(this->operator+(N)); } + + iterator() = default; + + uint64_t getOpaqueData() const { return uintptr_t(I.getOpaqueValue()); } + + static iterator fromOpaqueData(uint64_t Opaque) { + return iterator( + PointerUnion<const InternalRef *, + const InternalRef4B *>::getFromOpaqueValue((void *) + Opaque)); + } + + private: + friend class InternalRefArrayRef; + explicit iterator( + PointerUnion<const InternalRef *, const InternalRef4B *> I) + : I(I) {} + PointerUnion<const InternalRef *, const InternalRef4B *> I; + }; + + bool operator==(const InternalRefArrayRef &RHS) const { + return size() == RHS.size() && std::equal(begin(), end(), RHS.begin()); + } + + iterator begin() const { return iterator(Begin); } + iterator end() const { return begin() + Size; } + + /// Array accessor. + InternalRef operator[](ptrdiff_t N) const { return begin()[N]; } + + bool is4B() const { return isa<const InternalRef4B *>(Begin); } + bool is8B() const { return isa<const InternalRef *>(Begin); } + + ArrayRef<uint8_t> getBuffer() const { + if (is4B()) { + auto *B = cast<const InternalRef4B *>(Begin); + return ArrayRef((const uint8_t *)B, sizeof(InternalRef4B) * Size); + } + auto *B = cast<const InternalRef *>(Begin); + return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size); + } + + InternalRefArrayRef(std::nullopt_t = std::nullopt) { + // This is useful so that all the casts in the \p iterator functions can + // operate without needing to check for a null value. + static InternalRef PlaceHolder = InternalRef::getFromRawData(0); + Begin = &PlaceHolder; + } + + InternalRefArrayRef(ArrayRef<InternalRef> Refs) + : Begin(Refs.begin()), Size(Refs.size()) {} + + InternalRefArrayRef(ArrayRef<InternalRef4B> Refs) + : Begin(Refs.begin()), Size(Refs.size()) {} + +private: + PointerUnion<const InternalRef *, const InternalRef4B *> Begin; + size_t Size = 0; +}; + +/// Reference to a node. The node's data may not be stored in the database. +/// An \p ObjectID instance can only be used with the \p OnDiskGraphDB instance +/// it came from. \p ObjectIDs from different \p OnDiskGraphDB instances are not +/// comparable. +class ObjectID { +public: + uint64_t getOpaqueData() const { return Opaque; } + + static ObjectID fromOpaqueData(uint64_t Opaque) { return ObjectID(Opaque); } + + friend bool operator==(const ObjectID &LHS, const ObjectID &RHS) { + return LHS.Opaque == RHS.Opaque; + } + friend bool operator!=(const ObjectID &LHS, const ObjectID &RHS) { + return !(LHS == RHS); + } + +private: + explicit ObjectID(uint64_t Opaque) : Opaque(Opaque) {} + uint64_t Opaque; +}; + +/// Handle for a loaded node object. +class ObjectHandle { +public: + explicit ObjectHandle(uint64_t Opaque) : Opaque(Opaque) {} + uint64_t getOpaqueData() const { return Opaque; } + + static ObjectHandle fromFileOffset(FileOffset Offset); + static ObjectHandle fromMemory(uintptr_t Ptr); + + friend bool operator==(const ObjectHandle &LHS, const ObjectHandle &RHS) { + return LHS.Opaque == RHS.Opaque; + } + friend bool operator!=(const ObjectHandle &LHS, const ObjectHandle &RHS) { + return !(LHS == RHS); + } + +private: + uint64_t Opaque; +}; + +/// Iterator for ObjectID. +class object_refs_iterator + : public iterator_facade_base<object_refs_iterator, + std::random_access_iterator_tag, ObjectID> { +public: + bool operator==(const object_refs_iterator &RHS) const { return I == RHS.I; } + ObjectID operator*() const { + return ObjectID::fromOpaqueData((*I).getRawData()); + } + bool operator<(const object_refs_iterator &RHS) const { return I < RHS.I; } + ptrdiff_t operator-(const object_refs_iterator &RHS) const { + return I - RHS.I; + } + object_refs_iterator &operator+=(ptrdiff_t N) { + I += N; + return *this; + } + object_refs_iterator &operator-=(ptrdiff_t N) { + I -= N; + return *this; + } + ObjectID operator[](ptrdiff_t N) const { return *(this->operator+(N)); } + + object_refs_iterator() = default; + object_refs_iterator(InternalRefArrayRef::iterator I) : I(I) {} + + uint64_t getOpaqueData() const { return I.getOpaqueData(); } + + static object_refs_iterator fromOpaqueData(uint64_t Opaque) { + return InternalRefArrayRef::iterator::fromOpaqueData(Opaque); + } + +private: + InternalRefArrayRef::iterator I; +}; + +using object_refs_range = llvm::iterator_range<object_refs_iterator>; + +/// On-disk CAS nodes database, independent of a particular hashing algorithm. +class OnDiskGraphDB { +public: + /// Associate data & references with a particular object ID. If there is + /// already a record for this object the operation is a no-op. \param ID the + /// object ID to associate the data & references with. \param Refs references + /// \param Data data buffer. + Error store(ObjectID ID, ArrayRef<ObjectID> Refs, ArrayRef<char> Data); + + /// \returns \p nullopt if the object associated with \p Ref does not exist. + Expected<std::optional<ObjectHandle>> load(ObjectID Ref); + + /// \returns the hash bytes digest for the object reference. + ArrayRef<uint8_t> getDigest(ObjectID Ref) const { + // ObjectID should be valid to fetch Digest. + return cantFail(getDigest(getInternalRef(Ref))); + } + + /// Form a reference for the provided hash. The reference can be used as part + /// of a CAS object even if it's not associated with an object yet. + Expected<ObjectID> getReference(ArrayRef<uint8_t> Hash); + + /// Get an existing reference to the object \p Digest. + /// + /// Returns \p nullopt if the object is not stored in this CAS. + std::optional<ObjectID> getExistingReference(ArrayRef<uint8_t> Digest); + + /// Check whether the object associated with \p Ref is stored in the CAS. + /// Note that this function will fault-in according to the policy. + Expected<bool> isMaterialized(ObjectID Ref); + + /// Check whether the object associated with \p Ref is stored in the CAS. + /// Note that this function does not fault-in. + bool containsObject(ObjectID Ref) const { + return containsObject(Ref, /*CheckUpstream=*/true); + } + + /// \returns the data part of the provided object handle. + ArrayRef<char> getObjectData(ObjectHandle Node) const; + + /// \returns the object referenced by the provided object handle. + object_refs_range getObjectRefs(ObjectHandle Node) const { + InternalRefArrayRef Refs = getInternalRefs(Node); + return make_range(Refs.begin(), Refs.end()); + } + + /// \returns Total size of stored objects. + /// + /// NOTE: There's a possibility that the returned size is not including a + /// large object if the process crashed right at the point of inserting it. + size_t getStorageSize() const; + + /// \returns The precentage of space utilization of hard space limits. + /// + /// Return value is an integer between 0 and 100 for percentage. + unsigned getHardStorageLimitUtilization() const; + + void print(raw_ostream &OS) const; + + /// Hashing function type for validation. + using HashingFuncT = function_ref<void( + ArrayRef<ArrayRef<uint8_t>>, ArrayRef<char>, SmallVectorImpl<uint8_t> &)>; + + /// Validate the OnDiskGraphDB. + /// + /// \param Deep if true, rehash all the objects to ensure no data + /// corruption in stored objects, otherwise just validate the structure of + /// CAS database. + /// \param Hasher is the hashing function used for objects inside CAS. + Error validate(bool Deep, HashingFuncT Hasher) const; + + /// How to fault-in nodes if an upstream database is used. + enum class FaultInPolicy { + /// Copy only the requested node. + SingleNode, + /// Copy the the entire graph of a node. + FullTree, + }; + + /// Open the on-disk store from a directory. + /// + /// \param Path directory for the on-disk store. The directory will be created + /// if it doesn't exist. + /// \param HashName Identifier name for the hashing algorithm that is going to + /// be used. + /// \param HashByteSize Size for the object digest hash bytes. + /// \param UpstreamDB Optional on-disk store to be used for faulting-in nodes + /// if they don't exist in the primary store. The upstream store is only used + /// for reading nodes, new nodes are only written to the primary store. + /// \param Policy If \p UpstreamDB is provided, controls how nodes are copied + /// to primary store. This is recorded at creation time and subsequent opens + /// need to pass the same policy otherwise the \p open will fail. + static Expected<std::unique_ptr<OnDiskGraphDB>> + open(StringRef Path, StringRef HashName, unsigned HashByteSize, + std::unique_ptr<OnDiskGraphDB> UpstreamDB = nullptr, + FaultInPolicy Policy = FaultInPolicy::FullTree); + + ~OnDiskGraphDB(); + +private: + /// Forward declaration for a proxy for an ondisk index record. + struct IndexProxy; + + enum class ObjectPresence { + Missing, + InPrimaryDB, + OnlyInUpstreamDB, + }; + + /// Check if object exists and if it is on upstream only. + Expected<ObjectPresence> getObjectPresence(ObjectID Ref, + bool CheckUpstream) const; + + /// \returns true if object can be found in database. + bool containsObject(ObjectID Ref, bool CheckUpstream) const { + auto Presence = getObjectPresence(Ref, CheckUpstream); + if (!Presence) { + consumeError(Presence.takeError()); + return false; + } + switch (*Presence) { + case ObjectPresence::Missing: + return false; + case ObjectPresence::InPrimaryDB: + return true; + case ObjectPresence::OnlyInUpstreamDB: + return true; + } + llvm_unreachable("Unknown ObjectPresence enum"); + } + + /// When \p load is called for a node that doesn't exist, this function tries + /// to load it from the upstream store and copy it to the primary one. + Expected<std::optional<ObjectHandle>> faultInFromUpstream(ObjectID PrimaryID); + + /// Import the entire tree from upstream with \p UpstreamNode as root. + Error importFullTree(ObjectID PrimaryID, ObjectHandle UpstreamNode); + /// Import only the \param UpstreamNode. + Error importSingleNode(ObjectID PrimaryID, ObjectHandle UpstreamNode); + + /// Found the IndexProxy for the hash. + Expected<IndexProxy> indexHash(ArrayRef<uint8_t> Hash); + + /// Get path for creating standalone data file. + void getStandalonePath(StringRef FileSuffix, const IndexProxy &I, + SmallVectorImpl<char> &Path) const; + /// Create a standalone leaf file. + Error createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data); + + /// \name Helper functions for internal data structures. + /// \{ + static InternalRef getInternalRef(ObjectID Ref) { + return InternalRef::getFromRawData(Ref.getOpaqueData()); + } + + static ObjectID getExternalReference(InternalRef Ref) { + return ObjectID::fromOpaqueData(Ref.getRawData()); + } + + static ObjectID getExternalReference(const IndexProxy &I); + + static InternalRef makeInternalRef(FileOffset IndexOffset); + + Expected<ArrayRef<uint8_t>> getDigest(InternalRef Ref) const; + + ArrayRef<uint8_t> getDigest(const IndexProxy &I) const; + + Expected<IndexProxy> getIndexProxyFromRef(InternalRef Ref) const; + + IndexProxy + getIndexProxyFromPointer(OnDiskTrieRawHashMap::ConstOnDiskPtr P) const; + + InternalRefArrayRef getInternalRefs(ObjectHandle Node) const; + /// \} + + /// Get the atomic variable that keeps track of the standalone data storage + /// size. + std::atomic<uint64_t> &standaloneStorageSize() const; + + /// Increase the standalone data size. + void recordStandaloneSizeIncrease(size_t SizeIncrease); + /// Get the standalone data size. + uint64_t getStandaloneStorageSize() const; + + // Private constructor. + OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index, + OnDiskDataAllocator DataPool, + std::unique_ptr<OnDiskGraphDB> UpstreamDB, + FaultInPolicy Policy); + + /// Mapping from hash to object reference. + /// + /// Data type is TrieRecord. + OnDiskTrieRawHashMap Index; + + /// Storage for most objects. + /// + /// Data type is DataRecordHandle. + OnDiskDataAllocator DataPool; + + /// A StandaloneDataMap. + void *StandaloneData = nullptr; + + /// Path to the root directory. + std::string RootPath; + + /// Optional on-disk store to be used for faulting-in nodes. + std::unique_ptr<OnDiskGraphDB> UpstreamDB; + + /// The policy used to fault in data from upstream. + FaultInPolicy FIPolicy; +}; + +} // namespace llvm::cas::ondisk + +#endif // LLVM_CAS_ONDISKGRAPHDB_H diff --git a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h new file mode 100644 index 0000000..b762518 --- /dev/null +++ b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h @@ -0,0 +1,82 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This declares OnDiskKeyValueDB, a key value storage database of fixed size +/// key and value. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_ONDISKKEYVALUEDB_H +#define LLVM_CAS_ONDISKKEYVALUEDB_H + +#include "llvm/CAS/OnDiskTrieRawHashMap.h" + +namespace llvm::cas::ondisk { + +/// An on-disk key-value data store with the following properties: +/// * Keys are fixed length binary hashes with expected normal distribution. +/// * Values are buffers of the same size, specified at creation time. +/// * The value of a key cannot be changed once it is set. +/// * The value buffers returned from a key lookup have 8-byte alignment. +class OnDiskKeyValueDB { +public: + /// Associate a value with a key. + /// + /// \param Key the hash bytes for the key + /// \param Value the value bytes, same size as \p ValueSize parameter of + /// \p open call. + /// + /// \returns the value associated with the \p Key. It may be different than + /// \p Value if another value is already associated with this key. + Expected<ArrayRef<char>> put(ArrayRef<uint8_t> Key, ArrayRef<char> Value); + + /// \returns the value associated with the \p Key, or \p std::nullopt if the + /// key does not exist. + Expected<std::optional<ArrayRef<char>>> get(ArrayRef<uint8_t> Key); + + /// \returns Total size of stored data. + size_t getStorageSize() const { return Cache.size(); } + + /// \returns The precentage of space utilization of hard space limits. + /// + /// Return value is an integer between 0 and 100 for percentage. + unsigned getHardStorageLimitUtilization() const { + return Cache.size() * 100ULL / Cache.capacity(); + } + + /// Open the on-disk store from a directory. + /// + /// \param Path directory for the on-disk store. The directory will be created + /// if it doesn't exist. + /// \param HashName Identifier name for the hashing algorithm that is going to + /// be used. + /// \param KeySize Size for the key hash bytes. + /// \param ValueName Identifier name for the values. + /// \param ValueSize Size for the value bytes. + static Expected<std::unique_ptr<OnDiskKeyValueDB>> + open(StringRef Path, StringRef HashName, unsigned KeySize, + StringRef ValueName, size_t ValueSize); + + using CheckValueT = + function_ref<Error(FileOffset Offset, ArrayRef<char> Data)>; + /// Validate the storage with a callback \p CheckValue to check the stored + /// value. + Error validate(CheckValueT CheckValue) const; + +private: + OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache) + : ValueSize(ValueSize), Cache(std::move(Cache)) {} + + const size_t ValueSize; + OnDiskTrieRawHashMap Cache; +}; + +} // namespace llvm::cas::ondisk + +#endif // LLVM_CAS_ONDISKKEYVALUEDB_H diff --git a/llvm/include/llvm/CodeGen/MIRFSDiscriminatorOptions.h b/llvm/include/llvm/CodeGen/MIRFSDiscriminatorOptions.h new file mode 100644 index 0000000..672a6b3 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MIRFSDiscriminatorOptions.h @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Command line options for MIR Flow Sensitive discriminators. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MIRFSDISCRIMINATOR_OPTIONS_H +#define LLVM_CODEGEN_MIRFSDISCRIMINATOR_OPTIONS_H + +#include "llvm/Support/CommandLine.h" + +namespace llvm { +extern cl::opt<bool> ImprovedFSDiscriminator; +} // namespace llvm + +#endif // LLVM_CODEGEN_MIRFSDISCRIMINATOR_OPTIONS_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index f407b56..8613ddd 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -26,6 +26,7 @@ #include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" #include "llvm/ExecutionEngine/Orc/TaskDispatch.h" +#include "llvm/ExecutionEngine/Orc/WaitingOnGraph.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ExtensibleRTTI.h" @@ -49,6 +50,9 @@ class InProgressLookupState; enum class SymbolState : uint8_t; +using WaitingOnGraph = + detail::WaitingOnGraph<JITDylib *, NonOwningSymbolStringPtr>; + using ResourceTrackerSP = IntrusiveRefCntPtr<ResourceTracker>; using JITDylibSP = IntrusiveRefCntPtr<JITDylib>; @@ -1131,20 +1135,6 @@ private: using UnmaterializedInfosList = std::vector<std::shared_ptr<UnmaterializedInfo>>; - struct EmissionDepUnit { - EmissionDepUnit(JITDylib &JD) : JD(&JD) {} - - JITDylib *JD = nullptr; - DenseMap<NonOwningSymbolStringPtr, JITSymbolFlags> Symbols; - DenseMap<JITDylib *, DenseSet<NonOwningSymbolStringPtr>> Dependencies; - }; - - struct EmissionDepUnitInfo { - std::shared_ptr<EmissionDepUnit> EDU; - DenseSet<EmissionDepUnit *> IntraEmitUsers; - DenseMap<JITDylib *, DenseSet<NonOwningSymbolStringPtr>> NewDeps; - }; - // Information about not-yet-ready symbol. // * DefiningEDU will point to the EmissionDepUnit that defines the symbol. // * DependantEDUs will hold pointers to any EmissionDepUnits currently @@ -1154,9 +1144,6 @@ private: struct MaterializingInfo { friend class ExecutionSession; - std::shared_ptr<EmissionDepUnit> DefiningEDU; - DenseSet<EmissionDepUnit *> DependantEDUs; - LLVM_ABI void addQuery(std::shared_ptr<AsynchronousSymbolQuery> Q); LLVM_ABI void removeQuery(const AsynchronousSymbolQuery &Q); LLVM_ABI AsynchronousSymbolQueryList @@ -1778,30 +1765,26 @@ private: LLVM_ABI Error OL_notifyResolved(MaterializationResponsibility &MR, const SymbolMap &Symbols); - using EDUInfosMap = - DenseMap<JITDylib::EmissionDepUnit *, JITDylib::EmissionDepUnitInfo>; - - template <typename HandleNewDepFn> - void propagateExtraEmitDeps(std::deque<JITDylib::EmissionDepUnit *> Worklist, - EDUInfosMap &EDUInfos, - HandleNewDepFn HandleNewDep); - EDUInfosMap simplifyDepGroups(MaterializationResponsibility &MR, - ArrayRef<SymbolDependenceGroup> EmittedDeps); - void IL_makeEDUReady(std::shared_ptr<JITDylib::EmissionDepUnit> EDU, - JITDylib::AsynchronousSymbolQuerySet &Queries); - void IL_makeEDUEmitted(std::shared_ptr<JITDylib::EmissionDepUnit> EDU, - JITDylib::AsynchronousSymbolQuerySet &Queries); - bool IL_removeEDUDependence(JITDylib::EmissionDepUnit &EDU, JITDylib &DepJD, - NonOwningSymbolStringPtr DepSym, - EDUInfosMap &EDUInfos); - - static Error makeJDClosedError(JITDylib::EmissionDepUnit &EDU, - JITDylib &ClosedJD); - static Error makeUnsatisfiedDepsError(JITDylib::EmissionDepUnit &EDU, - JITDylib &BadJD, SymbolNameSet BadDeps); - - Expected<JITDylib::AsynchronousSymbolQuerySet> - IL_emit(MaterializationResponsibility &MR, EDUInfosMap EDUInfos); + // FIXME: We should be able to derive FailedSymsForQuery from each query once + // we fix how the detach operation works. + struct EmitQueries { + JITDylib::AsynchronousSymbolQuerySet Updated; + JITDylib::AsynchronousSymbolQuerySet Failed; + DenseMap<AsynchronousSymbolQuery *, std::shared_ptr<SymbolDependenceMap>> + FailedSymsForQuery; + }; + + WaitingOnGraph::ExternalState + IL_getSymbolState(JITDylib *JD, NonOwningSymbolStringPtr Name); + + template <typename UpdateSymbolFn, typename UpdateQueryFn> + void IL_collectQueries(JITDylib::AsynchronousSymbolQuerySet &Qs, + WaitingOnGraph::ContainerElementsMap &QualifiedSymbols, + UpdateSymbolFn &&UpdateSymbol, + UpdateQueryFn &&UpdateQuery); + + Expected<EmitQueries> IL_emit(MaterializationResponsibility &MR, + WaitingOnGraph::SimplifyResult SR); LLVM_ABI Error OL_notifyEmitted(MaterializationResponsibility &MR, ArrayRef<SymbolDependenceGroup> EmittedDeps); @@ -1830,6 +1813,7 @@ private: std::vector<ResourceManager *> ResourceManagers; std::vector<JITDylibSP> JDs; + WaitingOnGraph G; // FIXME: Remove this (and runOutstandingMUs) once the linking layer works // with callbacks from asynchronous queries. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h b/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h new file mode 100644 index 0000000..a5b5333 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h @@ -0,0 +1,622 @@ +//===------ WaitingOnGraph.h - ORC symbol dependence graph ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines WaitingOnGraph and related utilities. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_EXECUTIONENGINE_ORC_WAITINGONGRAPH_H +#define LLVM_EXECUTIONENGINE_ORC_WAITINGONGRAPH_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" + +#include <algorithm> + +namespace llvm::orc::detail { + +class WaitingOnGraphTest; + +/// WaitingOnGraph class template. +/// +/// This type is intended to provide efficient dependence tracking for Symbols +/// in an ORC program. +/// +/// WaitingOnGraph models a directed graph with four partitions: +/// 1. Not-yet-emitted nodes: Nodes identified as waited-on in an emit +/// operation. +/// 2. Emitted nodes: Nodes emitted and waiting on some non-empty set of +/// other nodes. +/// 3. Ready nodes: Nodes emitted and not waiting on any other nodes +/// (either because they weren't waiting on any nodes when they were +/// emitted, or because all transitively waited-on nodes have since +/// been emitted). +/// 4. Failed nodes: Nodes that have been marked as failed-to-emit, and +/// nodes that were found to transitively wait-on some failed node. +/// +/// Nodes are added to the graph by *emit* and *fail* operations. +/// +/// The *emit* operation takes a bipartite *local dependence graph* as an +/// argument and returns... +/// a. the set of nodes (both existing and newly added from the local +/// dependence graph) whose waiting-on set is the empty set, and... +/// b. the set of newly added nodes that are found to depend on failed +/// nodes. +/// +/// The *fail* operation takes a set of failed nodes and returns the set of +/// Emitted nodes that were waiting on the failed nodes. +/// +/// The concrete representation adopts several approaches for efficiency: +/// +/// 1. Only *Emitted* and *Not-yet-emitted* nodes are represented explicitly. +/// *Ready* and *Failed* nodes are represented by the values returned by the +/// GetExternalStateFn argument to *emit*. +/// +/// 2. Labels are (*Container*, *Element*) pairs that are intended to represent +/// ORC symbols (ORC uses types Container = JITDylib, +/// Element = NonOwningSymbolStringPtr). The internal representation of the +/// graph is optimized on the assumption that there are many more Elements +/// (symbol names) than Containers (JITDylibs) used to construct the labels. +/// (Consider for example the common case where most JIT'd code is placed in +/// a single "main" JITDylib). +/// +/// 3. The data structure stores *SuperNodes* which have multiple labels. This +/// reduces the number of nodes and edges in the graph in the common case +/// where many JIT symbols have the same set of dependencies. SuperNodes are +/// coalesced when their dependence sets become equal. +/// +/// 4. The *simplify* method can be applied to an initial *local dependence +/// graph* (as a list of SuperNodes) to eliminate any internal dependence +/// relationships that would have to be propagated internally by *emit*. +/// Access to the WaitingOnGraph is assumed to be guarded by a mutex (ORC +/// will access it from multiple threads) so this allows some pre-processing +/// to be performed outside the mutex. +template <typename ContainerIdT, typename ElementIdT> class WaitingOnGraph { + friend class WaitingOnGraphTest; + +public: + using ContainerId = ContainerIdT; + using ElementId = ElementIdT; + using ElementSet = DenseSet<ElementId>; + using ContainerElementsMap = DenseMap<ContainerId, ElementSet>; + + class SuperNode { + friend class WaitingOnGraph; + friend class WaitingOnGraphTest; + + public: + SuperNode(ContainerElementsMap Defs, ContainerElementsMap Deps) + : Defs(std::move(Defs)), Deps(std::move(Deps)) {} + ContainerElementsMap &defs() { return Defs; } + const ContainerElementsMap &defs() const { return Defs; } + ContainerElementsMap &deps() { return Deps; } + const ContainerElementsMap &deps() const { return Deps; } + + private: + ContainerElementsMap Defs; + ContainerElementsMap Deps; + }; + +private: + using ElemToSuperNodeMap = + DenseMap<ContainerId, DenseMap<ElementId, SuperNode *>>; + + using SuperNodeDepsMap = DenseMap<SuperNode *, DenseSet<SuperNode *>>; + + class Coalescer { + public: + std::unique_ptr<SuperNode> addOrCreateSuperNode(ContainerElementsMap Defs, + ContainerElementsMap Deps) { + auto H = getHash(Deps); + if (auto *ExistingSN = findCanonicalSuperNode(H, Deps)) { + for (auto &[Container, Elems] : Defs) { + auto &DstCElems = ExistingSN->Defs[Container]; + [[maybe_unused]] size_t ExpectedSize = + DstCElems.size() + Elems.size(); + DstCElems.insert(Elems.begin(), Elems.end()); + assert(DstCElems.size() == ExpectedSize); + } + return nullptr; + } + + auto NewSN = + std::make_unique<SuperNode>(std::move(Defs), std::move(Deps)); + CanonicalSNs[H].push_back(NewSN.get()); + return NewSN; + } + + void coalesce(std::vector<std::unique_ptr<SuperNode>> &SNs, + ElemToSuperNodeMap &ElemToSN) { + for (size_t I = 0; I != SNs.size();) { + auto &SN = SNs[I]; + auto H = getHash(SN->Deps); + if (auto *CanonicalSN = findCanonicalSuperNode(H, SN->Deps)) { + for (auto &[Container, Elems] : SN->Defs) { + CanonicalSN->Defs[Container].insert(Elems.begin(), Elems.end()); + auto &ContainerElemToSN = ElemToSN[Container]; + for (auto &Elem : Elems) + ContainerElemToSN[Elem] = CanonicalSN; + } + std::swap(SN, SNs.back()); + SNs.pop_back(); + } else { + CanonicalSNs[H].push_back(SN.get()); + ++I; + } + } + } + + template <typename Pred> void remove(Pred &&Remove) { + for (auto &[Hash, SNs] : CanonicalSNs) { + bool Found = false; + for (size_t I = 0; I != SNs.size(); ++I) { + if (Remove(SNs[I])) { + std::swap(SNs[I], SNs.back()); + SNs.pop_back(); + Found = true; + break; + } + } + if (Found) { + if (SNs.empty()) + CanonicalSNs.erase(Hash); + break; + } + } + } + + private: + hash_code getHash(const ContainerElementsMap &M) { + SmallVector<ContainerId> SortedContainers; + SortedContainers.reserve(M.size()); + for (auto &[Container, Elems] : M) + SortedContainers.push_back(Container); + llvm::sort(SortedContainers); + hash_code Hash(0); + for (auto &Container : SortedContainers) { + auto &ContainerElems = M.at(Container); + SmallVector<ElementId> SortedElems(ContainerElems.begin(), + ContainerElems.end()); + llvm::sort(SortedElems); + Hash = hash_combine( + Hash, Container, + hash_combine_range(SortedElems.begin(), SortedElems.end())); + } + return Hash; + } + + SuperNode *findCanonicalSuperNode(hash_code H, + const ContainerElementsMap &M) { + for (auto *SN : CanonicalSNs[H]) + if (SN->Deps == M) + return SN; + return nullptr; + } + + DenseMap<hash_code, SmallVector<SuperNode *>> CanonicalSNs; + }; + +public: + /// Build SuperNodes from (definition-set, dependence-set) pairs. + /// + /// Coalesces definition-sets with identical dependence-sets. + class SuperNodeBuilder { + public: + void add(ContainerElementsMap Defs, ContainerElementsMap Deps) { + if (Defs.empty()) + return; + // Remove any self-reference. + SmallVector<ContainerId> ToRemove; + for (auto &[Container, Elems] : Defs) { + assert(!Elems.empty() && "Defs for container must not be empty"); + auto I = Deps.find(Container); + if (I == Deps.end()) + continue; + auto &DepsForContainer = I->second; + for (auto &Elem : Elems) + DepsForContainer.erase(Elem); + if (DepsForContainer.empty()) + ToRemove.push_back(Container); + } + for (auto &Container : ToRemove) + Deps.erase(Container); + if (auto SN = C.addOrCreateSuperNode(std::move(Defs), std::move(Deps))) + SNs.push_back(std::move(SN)); + } + std::vector<std::unique_ptr<SuperNode>> takeSuperNodes() { + return std::move(SNs); + } + + private: + Coalescer C; + std::vector<std::unique_ptr<SuperNode>> SNs; + }; + + class SimplifyResult { + friend class WaitingOnGraph; + friend class WaitingOnGraphTest; + + public: + const std::vector<std::unique_ptr<SuperNode>> &superNodes() const { + return SNs; + } + + private: + SimplifyResult(std::vector<std::unique_ptr<SuperNode>> SNs, + ElemToSuperNodeMap ElemToSN) + : SNs(std::move(SNs)), ElemToSN(std::move(ElemToSN)) {} + std::vector<std::unique_ptr<SuperNode>> SNs; + ElemToSuperNodeMap ElemToSN; + }; + + /// Preprocess a list of SuperNodes to remove all intra-SN dependencies. + static SimplifyResult simplify(std::vector<std::unique_ptr<SuperNode>> SNs) { + // Build ElemToSN map. + ElemToSuperNodeMap ElemToSN; + for (auto &SN : SNs) { + for (auto &[Container, Elements] : SN->Defs) { + auto &ContainerElemToSN = ElemToSN[Container]; + for (auto &E : Elements) + ContainerElemToSN[E] = SN.get(); + } + } + + SuperNodeDepsMap SuperNodeDeps; + hoistDeps(SuperNodeDeps, SNs, ElemToSN); + propagateSuperNodeDeps(SuperNodeDeps); + sinkDeps(SNs, SuperNodeDeps); + + // Pre-coalesce nodes. + Coalescer().coalesce(SNs, ElemToSN); + + return {std::move(SNs), std::move(ElemToSN)}; + } + + struct EmitResult { + std::vector<std::unique_ptr<SuperNode>> Ready; + std::vector<std::unique_ptr<SuperNode>> Failed; + }; + + enum class ExternalState { None, Ready, Failed }; + + /// Add the given SuperNodes to the graph, returning any SuperNodes that + /// move to the Ready or Failed states as a result. + /// The GetExternalState function is used to represent SuperNodes that have + /// already become Ready or Failed (since such nodes are not explicitly + /// represented in the graph). + template <typename GetExternalStateFn> + EmitResult emit(SimplifyResult SR, GetExternalStateFn &&GetExternalState) { + auto NewSNs = std::move(SR.SNs); + auto ElemToNewSN = std::move(SR.ElemToSN); + + // First process any dependencies on nodes with external state. + auto FailedSNs = processExternalDeps(NewSNs, GetExternalState); + + // Collect the PendingSNs whose dep sets are about to be modified. + std::vector<std::unique_ptr<SuperNode>> ModifiedPendingSNs; + for (size_t I = 0; I != PendingSNs.size();) { + auto &SN = PendingSNs[I]; + bool Remove = false; + for (auto &[Container, Elems] : SN->Deps) { + auto I = ElemToNewSN.find(Container); + if (I == ElemToNewSN.end()) + continue; + for (auto Elem : Elems) { + if (I->second.contains(Elem)) { + Remove = true; + break; + } + } + if (Remove) + break; + } + if (Remove) { + ModifiedPendingSNs.push_back(std::move(SN)); + std::swap(SN, PendingSNs.back()); + PendingSNs.pop_back(); + } else + ++I; + } + + // Remove cycles from the graphs. + SuperNodeDepsMap SuperNodeDeps; + hoistDeps(SuperNodeDeps, ModifiedPendingSNs, ElemToNewSN); + + CoalesceToPendingSNs.remove( + [&](SuperNode *SN) { return SuperNodeDeps.count(SN); }); + + hoistDeps(SuperNodeDeps, NewSNs, ElemToPendingSN); + propagateSuperNodeDeps(SuperNodeDeps); + sinkDeps(NewSNs, SuperNodeDeps); + sinkDeps(ModifiedPendingSNs, SuperNodeDeps); + + // Process supernodes. Pending first, since we'll update PendingSNs when we + // incorporate NewSNs. + std::vector<std::unique_ptr<SuperNode>> ReadyNodes, FailedNodes; + processReadyOrFailed(ModifiedPendingSNs, ReadyNodes, FailedNodes, + SuperNodeDeps, ElemToPendingSN, FailedSNs); + processReadyOrFailed(NewSNs, ReadyNodes, FailedNodes, SuperNodeDeps, + ElemToNewSN, FailedSNs); + + CoalesceToPendingSNs.coalesce(ModifiedPendingSNs, ElemToPendingSN); + CoalesceToPendingSNs.coalesce(NewSNs, ElemToPendingSN); + + // Integrate remaining ModifiedPendingSNs and NewSNs into PendingSNs. + for (auto &SN : ModifiedPendingSNs) + PendingSNs.push_back(std::move(SN)); + + // Update ElemToPendingSN for the remaining elements. + for (auto &SN : NewSNs) { + for (auto &[Container, Elems] : SN->Defs) { + auto &Row = ElemToPendingSN[Container]; + for (auto &Elem : Elems) + Row[Elem] = SN.get(); + } + PendingSNs.push_back(std::move(SN)); + } + + return {std::move(ReadyNodes), std::move(FailedNodes)}; + } + + /// Identify the given symbols as Failed. + /// The elements of the Failed map will not be included in the returned + /// result, so clients should take whatever actions are needed to mark + /// this as failed in their external representation. + std::vector<std::unique_ptr<SuperNode>> + fail(const ContainerElementsMap &Failed) { + std::vector<std::unique_ptr<SuperNode>> FailedSNs; + + for (size_t I = 0; I != PendingSNs.size();) { + auto &PendingSN = PendingSNs[I]; + bool FailPendingSN = false; + for (auto &[Container, Elems] : PendingSN->Deps) { + if (FailPendingSN) + break; + auto I = Failed.find(Container); + if (I == Failed.end()) + continue; + for (auto &Elem : Elems) { + if (I->second.count(Elem)) { + FailPendingSN = true; + break; + } + } + } + if (FailPendingSN) { + FailedSNs.push_back(std::move(PendingSN)); + PendingSN = std::move(PendingSNs.back()); + PendingSNs.pop_back(); + } else + ++I; + } + + for (auto &SN : FailedSNs) { + CoalesceToPendingSNs.remove( + [&](SuperNode *SNC) { return SNC == SN.get(); }); + for (auto &[Container, Elems] : SN->Defs) { + assert(ElemToPendingSN.count(Container)); + auto &CElems = ElemToPendingSN[Container]; + for (auto &Elem : Elems) + CElems.erase(Elem); + if (CElems.empty()) + ElemToPendingSN.erase(Container); + } + } + + return FailedSNs; + } + + bool validate(raw_ostream &Log) { + bool AllGood = true; + auto ErrLog = [&]() -> raw_ostream & { + AllGood = false; + return Log; + }; + + size_t DefCount = 0; + for (auto &PendingSN : PendingSNs) { + if (PendingSN->Deps.empty()) + ErrLog() << "Pending SN " << PendingSN.get() << " has empty dep set.\n"; + else { + bool BadElem = false; + for (auto &[Container, Elems] : PendingSN->Deps) { + auto I = ElemToPendingSN.find(Container); + if (I == ElemToPendingSN.end()) + continue; + if (Elems.empty()) + ErrLog() << "Pending SN " << PendingSN.get() + << " has dependence map entry for " << Container + << " with empty element set.\n"; + for (auto &Elem : Elems) { + if (I->second.count(Elem)) { + ErrLog() << "Pending SN " << PendingSN.get() + << " has dependence on emitted element ( " << Container + << ", " << Elem << ")\n"; + BadElem = true; + break; + } + } + if (BadElem) + break; + } + } + + for (auto &[Container, Elems] : PendingSN->Defs) { + if (Elems.empty()) + ErrLog() << "Pending SN " << PendingSN.get() + << " has def map entry for " << Container + << " with empty element set.\n"; + DefCount += Elems.size(); + auto I = ElemToPendingSN.find(Container); + if (I == ElemToPendingSN.end()) + ErrLog() << "Pending SN " << PendingSN.get() << " has " + << Elems.size() << " defs in container " << Container + << " not covered by ElemsToPendingSN.\n"; + else { + for (auto &Elem : Elems) { + auto J = I->second.find(Elem); + if (J == I->second.end()) + ErrLog() << "Pending SN " << PendingSN.get() << " has element (" + << Container << ", " << Elem + << ") not covered by ElemsToPendingSN.\n"; + else if (J->second != PendingSN.get()) + ErrLog() << "ElemToPendingSN value invalid for (" << Container + << ", " << Elem << ")\n"; + } + } + } + } + + size_t DefCount2 = 0; + for (auto &[Container, Elems] : ElemToPendingSN) + DefCount2 += Elems.size(); + + assert(DefCount2 >= DefCount); + if (DefCount2 != DefCount) + ErrLog() << "ElemToPendingSN contains extra elements.\n"; + + return AllGood; + } + +private: + // Replace individual dependencies with supernode dependencies. + // + // For all dependencies in SNs, if the corresponding node is defined in + // ElemToSN then remove the individual dependency and add the record the + // dependency on the corresponding supernode in SuperNodeDeps. + static void hoistDeps(SuperNodeDepsMap &SuperNodeDeps, + std::vector<std::unique_ptr<SuperNode>> &SNs, + ElemToSuperNodeMap &ElemToSN) { + for (auto &SN : SNs) { + auto &SNDeps = SuperNodeDeps[SN.get()]; + for (auto &[DefContainer, DefElems] : ElemToSN) { + auto I = SN->Deps.find(DefContainer); + if (I == SN->Deps.end()) + continue; + for (auto &[DefElem, DefSN] : DefElems) + if (I->second.erase(DefElem)) + SNDeps.insert(DefSN); + if (I->second.empty()) + SN->Deps.erase(I); + } + } + } + + // Compute transitive closure of deps for each node. + static void propagateSuperNodeDeps(SuperNodeDepsMap &SuperNodeDeps) { + for (auto &[SN, Deps] : SuperNodeDeps) { + DenseSet<SuperNode *> Reachable({SN}); + SmallVector<SuperNode *> Worklist(Deps.begin(), Deps.end()); + + while (!Worklist.empty()) { + auto *DepSN = Worklist.pop_back_val(); + if (!Reachable.insert(DepSN).second) + continue; + auto I = SuperNodeDeps.find(DepSN); + if (I == SuperNodeDeps.end()) + continue; + for (auto *DepSNDep : I->second) + Worklist.push_back(DepSNDep); + } + + Deps = std::move(Reachable); + } + } + + // Sink SuperNode dependencies back to dependencies on individual nodes. + static void sinkDeps(std::vector<std::unique_ptr<SuperNode>> &SNs, + SuperNodeDepsMap &SuperNodeDeps) { + for (auto &SN : SNs) { + auto I = SuperNodeDeps.find(SN.get()); + if (I == SuperNodeDeps.end()) + continue; + + for (auto *DepSN : I->second) + for (auto &[Container, Elems] : DepSN->Deps) + SN->Deps[Container].insert(Elems.begin(), Elems.end()); + } + } + + template <typename GetExternalStateFn> + static std::vector<SuperNode *> + processExternalDeps(std::vector<std::unique_ptr<SuperNode>> &SNs, + GetExternalStateFn &GetExternalState) { + std::vector<SuperNode *> FailedSNs; + for (auto &SN : SNs) { + bool SNHasError = false; + SmallVector<ContainerId> ContainersToRemove; + for (auto &[Container, Elems] : SN->Deps) { + SmallVector<ElementId> ElemToRemove; + for (auto &Elem : Elems) { + switch (GetExternalState(Container, Elem)) { + case ExternalState::None: + break; + case ExternalState::Ready: + ElemToRemove.push_back(Elem); + break; + case ExternalState::Failed: + ElemToRemove.push_back(Elem); + SNHasError = true; + break; + } + } + for (auto &Elem : ElemToRemove) + Elems.erase(Elem); + if (Elems.empty()) + ContainersToRemove.push_back(Container); + } + for (auto &Container : ContainersToRemove) + SN->Deps.erase(Container); + if (SNHasError) + FailedSNs.push_back(SN.get()); + } + + return FailedSNs; + } + + void processReadyOrFailed(std::vector<std::unique_ptr<SuperNode>> &SNs, + std::vector<std::unique_ptr<SuperNode>> &Ready, + std::vector<std::unique_ptr<SuperNode>> &Failed, + SuperNodeDepsMap &SuperNodeDeps, + ElemToSuperNodeMap &ElemToSNs, + std::vector<SuperNode *> FailedSNs) { + for (size_t I = 0; I != SNs.size();) { + auto &SN = SNs[I]; + + bool SNFailed = false; + assert(SuperNodeDeps.count(SN.get())); + auto &SNSuperNodeDeps = SuperNodeDeps[SN.get()]; + for (auto *FailedSN : FailedSNs) { + if (FailedSN == SN.get() || SNSuperNodeDeps.count(FailedSN)) { + SNFailed = true; + break; + } + } + + bool SNReady = SN->Deps.empty(); + + if (SNReady || SNFailed) { + auto &NodeList = SNFailed ? Failed : Ready; + NodeList.push_back(std::move(SN)); + std::swap(SN, SNs.back()); + SNs.pop_back(); + } else + ++I; + } + } + + std::vector<std::unique_ptr<SuperNode>> PendingSNs; + ElemToSuperNodeMap ElemToPendingSN; + Coalescer CoalesceToPendingSNs; +}; + +} // namespace llvm::orc::detail + +#endif // LLVM_EXECUTIONENGINE_ORC_WAITINGONGRAPH_H diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index 1a01fa6..87b9520 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -541,6 +541,14 @@ struct DeviceT { std::tuple<OPT(DeviceModifier), DeviceDescription> t; }; +// [6.0:362] +template <typename T, typename I, typename E> // +struct DeviceSafesyncT { + using Requires = E; + using WrapperTrait = std::true_type; + OPT(Requires) v; +}; + // V5.2: [13.1] `device_type` clause template <typename T, typename I, typename E> // struct DeviceTypeT { @@ -1332,14 +1340,15 @@ using WrapperClausesT = std::variant< AtomicDefaultMemOrderT<T, I, E>, AtT<T, I, E>, BindT<T, I, E>, CollapseT<T, I, E>, ContainsT<T, I, E>, CopyinT<T, I, E>, CopyprivateT<T, I, E>, DefaultT<T, I, E>, DestroyT<T, I, E>, - DetachT<T, I, E>, DeviceTypeT<T, I, E>, DynamicAllocatorsT<T, I, E>, - EnterT<T, I, E>, ExclusiveT<T, I, E>, FailT<T, I, E>, FilterT<T, I, E>, - FinalT<T, I, E>, FirstprivateT<T, I, E>, HasDeviceAddrT<T, I, E>, - HintT<T, I, E>, HoldsT<T, I, E>, InclusiveT<T, I, E>, IndirectT<T, I, E>, - InitializerT<T, I, E>, IsDevicePtrT<T, I, E>, LinkT<T, I, E>, - MessageT<T, I, E>, NocontextT<T, I, E>, NontemporalT<T, I, E>, - NovariantsT<T, I, E>, NumTeamsT<T, I, E>, NumThreadsT<T, I, E>, - OrderedT<T, I, E>, PartialT<T, I, E>, PriorityT<T, I, E>, PrivateT<T, I, E>, + DetachT<T, I, E>, DeviceSafesyncT<T, I, E>, DeviceTypeT<T, I, E>, + DynamicAllocatorsT<T, I, E>, EnterT<T, I, E>, ExclusiveT<T, I, E>, + FailT<T, I, E>, FilterT<T, I, E>, FinalT<T, I, E>, FirstprivateT<T, I, E>, + HasDeviceAddrT<T, I, E>, HintT<T, I, E>, HoldsT<T, I, E>, + InclusiveT<T, I, E>, IndirectT<T, I, E>, InitializerT<T, I, E>, + IsDevicePtrT<T, I, E>, LinkT<T, I, E>, MessageT<T, I, E>, + NocontextT<T, I, E>, NontemporalT<T, I, E>, NovariantsT<T, I, E>, + NumTeamsT<T, I, E>, NumThreadsT<T, I, E>, OrderedT<T, I, E>, + PartialT<T, I, E>, PriorityT<T, I, E>, PrivateT<T, I, E>, ProcBindT<T, I, E>, ReverseOffloadT<T, I, E>, SafelenT<T, I, E>, SelfMapsT<T, I, E>, SeverityT<T, I, E>, SharedT<T, I, E>, SimdlenT<T, I, E>, SizesT<T, I, E>, PermutationT<T, I, E>, ThreadLimitT<T, I, E>, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index edcf7a9..61a1a05 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -163,6 +163,10 @@ def OMPC_Device : Clause<[Spelling<"device">]> { let clangClass = "OMPDeviceClause"; let flangClass = "OmpDeviceClause"; } +def OMPC_DeviceSafesync : Clause<[Spelling<"device_safesync">]> { + let flangClass = "OmpDeviceSafesyncClause"; + let isValueOptional = true; +} def OMPC_DeviceType : Clause<[Spelling<"device_type">]> { let flangClass = "OmpDeviceTypeClause"; } @@ -1018,6 +1022,7 @@ def OMP_Requires : Directive<[Spelling<"requires">]> { let allowedOnceClauses = [ VersionedClause<OMPC_UnifiedAddress>, VersionedClause<OMPC_UnifiedSharedMemory>, + VersionedClause<OMPC_DeviceSafesync, 60>, // OpenMP 5.2 Spec: If an implementation is not supporting a requirement // (reverse offload in this case) then it should give compile-time error // termination. diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b0269ee..b81edc3 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -522,7 +522,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_neon_vcmla_rot90 : AdvSIMD_3VectorArg_Intrinsic; def int_aarch64_neon_vcmla_rot180 : AdvSIMD_3VectorArg_Intrinsic; def int_aarch64_neon_vcmla_rot270 : AdvSIMD_3VectorArg_Intrinsic; - + // FP8 fscale def int_aarch64_neon_fp8_fscale : DefaultAttrsIntrinsic< [llvm_anyvector_ty], @@ -1467,7 +1467,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMSubdivide2VectorType<0>, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>; - + class SVE2_1VectorArgIndexed_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -1482,7 +1482,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>; - + class SVE2_1VectorArg_Pred_Intrinsic : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyvector_ty], @@ -1492,7 +1492,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; - + class SVE2_Pred_1VectorArgIndexed_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -3353,11 +3353,11 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_nxv8bf16_ty], [llvm_nxv4f32_ty, llvm_nxv4f32_ty], [IntrNoMem]>; - + class SVE2_CVT_WIDENING_VG2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [LLVMSubdivide2VectorType<0>], [IntrNoMem]>; - + class SVE2_CVT_VG4_SINGLE_Intrinsic : DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>], @@ -3740,7 +3740,7 @@ let TargetPrefix = "aarch64" in { llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrInaccessibleMemOnly, IntrWriteMem]>; - + class SME2_Add_Sub_Write_VG4_Multi_Multi_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, @@ -3887,7 +3887,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_luti4_lane_zt : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>; - + // Lookup table expand two registers // def int_aarch64_sme_luti2_lane_zt_x2 @@ -3896,7 +3896,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_luti4_lane_zt_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>; - + // // Lookup table expand four registers // @@ -3914,7 +3914,7 @@ let TargetPrefix = "aarch64" in { [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrReadMem]>; - + // // Register scaling // @@ -3962,7 +3962,7 @@ def int_aarch64_sve_extq : AdvSIMD_2VectorArgIndexed_Intrinsic; // // SVE2.1 - Move predicate to/from vector // -def int_aarch64_sve_pmov_to_pred_lane : SVE2_1VectorArgIndexed_Pred_Intrinsic; +def int_aarch64_sve_pmov_to_pred_lane : SVE2_1VectorArgIndexed_Pred_Intrinsic; def int_aarch64_sve_pmov_to_pred_lane_zero : SVE2_1VectorArg_Pred_Intrinsic; @@ -4004,10 +4004,10 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_anyvector_ty, LLVMMatchType<0>], [IntrReadMem, IntrInaccessibleMemOnly]>; - + def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt; def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt; - + def int_aarch64_sve_fp8_cvtnt : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>], @@ -4019,32 +4019,32 @@ let TargetPrefix = "aarch64" in { [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty], [IntrReadMem, IntrInaccessibleMemOnly]>; - + class SVE2_FP8_FMLA_FDOT_Lane : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>; - + def int_aarch64_sve_fp8_fdot : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane; // Fused multiply-add def int_aarch64_sve_fp8_fmlalb : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalb_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlalt : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalt_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlallbb : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlallbb_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlallbt : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlallbt_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlalltb : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalltb_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlalltt : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalltt_lane : SVE2_FP8_FMLA_FDOT_Lane; diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index ded00b1..9e334d4 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2819,20 +2819,24 @@ class AMDGPULoadToLDS : "", [SDNPMemOperand]>; def int_amdgcn_load_to_lds : AMDGPULoadToLDS; -class AMDGPUGlobalLoadLDS : - ClangBuiltin<"__builtin_amdgcn_global_load_lds">, - Intrinsic < - [], - [LLVMQualPointerType<1>, // Base global pointer to load from - LLVMQualPointerType<3>, // LDS base pointer to store to - llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) - llvm_i32_ty, // imm offset (applied to both global and LDS address) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0, - // bit 1 = sc1, - // bit 4 = scc)) - [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, - ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree], - "", [SDNPMemOperand]>; +class AMDGPUGlobalLoadLDS + : ClangBuiltin<"__builtin_amdgcn_global_load_lds">, + Intrinsic< + [], + [LLVMQualPointerType<1>, // Base global pointer to load from + LLVMQualPointerType<3>, // LDS base pointer to store to + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) + llvm_i32_ty, // imm offset (applied to both global and LDS address) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0, + // bit 1 = sc1, + // bit 4 = scc, + // bit 31 = volatile + // (compiler + // implemented))) + [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, + ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, + IntrNoCallback, IntrNoFree], + "", [SDNPMemOperand]>; def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS; // This is IntrHasSideEffects because it reads from a volatile hardware register. diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 307cc66..ada3523 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -31,7 +31,6 @@ /// implementation, which includes a name and type signature. #define GET_RUNTIME_LIBCALL_ENUM #include "llvm/IR/RuntimeLibcalls.inc" -#undef GET_RUNTIME_LIBCALL_ENUM namespace llvm { @@ -170,7 +169,6 @@ public: // querying a real set of symbols #define GET_LOOKUP_LIBCALL_IMPL_NAME_BODY #include "llvm/IR/RuntimeLibcalls.inc" -#undef GET_LOOKUP_LIBCALL_IMPL_NAME_BODY } /// Check if this is valid libcall for the current module, otherwise diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index 41232335..9bbb8a2 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -316,11 +316,9 @@ inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx, /// Valid only for positive powers of two. template <size_t kValue> constexpr size_t ConstantLog2() { static_assert(llvm::isPowerOf2_64(kValue), "Value is not a valid power of 2"); - return 1 + ConstantLog2<kValue / 2>(); + return llvm::countr_zero_constexpr(kValue); } -template <> constexpr size_t ConstantLog2<1>() { return 0; } - template <size_t kValue> LLVM_DEPRECATED("Use ConstantLog2 instead", "ConstantLog2") constexpr size_t CTLog2() { |