diff options
Diffstat (limited to 'llvm/lib')
47 files changed, 2055 insertions, 242 deletions
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 90bae77..6fb2807 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -59,6 +59,11 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LazyValueInfoWrapperPass, "lazy-value-info", "Lazy Value Information Analysis", false, true) +static cl::opt<bool> PerPredRanges( + "lvi-per-pred-ranges", cl::Hidden, cl::init(false), + cl::desc("Enable tracking of ranges for a value in a block for" + "each block predecessor (default = false)")); + namespace llvm { FunctionPass *createLazyValueInfoPass() { return new LazyValueInfoWrapperPass(); @@ -103,6 +108,10 @@ namespace { namespace { using NonNullPointerSet = SmallDenseSet<AssertingVH<Value>, 2>; +using BBLatticeElementMap = + SmallDenseMap<PoisoningVH<BasicBlock>, ValueLatticeElement, 4>; +using PredecessorValueLatticeMap = + SmallDenseMap<AssertingVH<Value>, BBLatticeElementMap, 2>; /// This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. @@ -117,6 +126,10 @@ class LazyValueInfoCache { // std::nullopt indicates that the nonnull pointers for this basic block // block have not been computed yet. std::optional<NonNullPointerSet> NonNullPointers; + // This is an extension of the above LatticeElements, caching, for each + // Value, a ValueLatticeElement, for each predecessor of the BB tracked by + // this entry. + std::optional<PredecessorValueLatticeMap> PredecessorLatticeElements; }; /// Cached information per basic block. @@ -134,8 +147,14 @@ class LazyValueInfoCache { BlockCacheEntry *getOrCreateBlockEntry(BasicBlock *BB) { auto It = BlockCache.find_as(BB); - if (It == BlockCache.end()) - It = BlockCache.insert({BB, std::make_unique<BlockCacheEntry>()}).first; + if (It == BlockCache.end()) { + std::unique_ptr<BlockCacheEntry> BCE = + std::make_unique<BlockCacheEntry>(); + if (PerPredRanges) + BCE->PredecessorLatticeElements = + std::make_optional<PredecessorValueLatticeMap>(); + It = BlockCache.insert({BB, std::move(BCE)}).first; + } return It->second.get(); } @@ -161,6 +180,28 @@ public: addValueHandle(Val); } + void insertPredecessorResults(Value *Val, BasicBlock *BB, + BBLatticeElementMap &PredLatticeElements) { + BlockCacheEntry *Entry = getOrCreateBlockEntry(BB); + + Entry->PredecessorLatticeElements->insert({Val, PredLatticeElements}); + + addValueHandle(Val); + } + + std::optional<BBLatticeElementMap> + getCachedPredecessorInfo(Value *V, BasicBlock *BB) const { + const BlockCacheEntry *Entry = getBlockEntry(BB); + if (!Entry) + return std::nullopt; + + auto LatticeIt = Entry->PredecessorLatticeElements->find_as(V); + if (LatticeIt == Entry->PredecessorLatticeElements->end()) + return std::nullopt; + + return LatticeIt->second; + } + std::optional<ValueLatticeElement> getCachedValueInfo(Value *V, BasicBlock *BB) const { const BlockCacheEntry *Entry = getBlockEntry(BB); @@ -216,6 +257,8 @@ void LazyValueInfoCache::eraseValue(Value *V) { Pair.second->OverDefined.erase(V); if (Pair.second->NonNullPointers) Pair.second->NonNullPointers->erase(V); + if (PerPredRanges) + Pair.second->PredecessorLatticeElements->erase(V); } auto HandleIt = ValueHandles.find_as(V); @@ -230,6 +273,10 @@ void LVIValueHandle::deleted() { } void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { + // Clear all when a BB is removed. + if (PerPredRanges) + for (auto &Pair : BlockCache) + Pair.second->PredecessorLatticeElements->clear(); BlockCache.erase(BB); } @@ -691,6 +738,9 @@ LazyValueInfoImpl::solveBlockValueNonLocal(Value *Val, BasicBlock *BB) { // find a path to function entry. TODO: We should consider explicitly // canonicalizing to make this true rather than relying on this happy // accident. + std::optional<BBLatticeElementMap> PredLatticeElements; + if (PerPredRanges) + PredLatticeElements = std::make_optional<BBLatticeElementMap>(); for (BasicBlock *Pred : predecessors(BB)) { // Skip self loops. if (Pred == BB) @@ -710,8 +760,13 @@ LazyValueInfoImpl::solveBlockValueNonLocal(Value *Val, BasicBlock *BB) { << Pred->getName() << "' (non local).\n"); return Result; } + if (PerPredRanges) + PredLatticeElements->insert({Pred, *EdgeResult}); } + if (PerPredRanges) + TheCache.insertPredecessorResults(Val, BB, *PredLatticeElements); + // Return the merged value, which is more precise than 'overdefined'. assert(!Result.isOverdefined()); return Result; @@ -724,6 +779,9 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) { // Loop over all of our predecessors, merging what we know from them into // result. See the comment about the chosen traversal order in // solveBlockValueNonLocal; the same reasoning applies here. + std::optional<BBLatticeElementMap> PredLatticeElements; + if (PerPredRanges) + PredLatticeElements = std::make_optional<BBLatticeElementMap>(); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *PhiBB = PN->getIncomingBlock(i); Value *PhiVal = PN->getIncomingValue(i); @@ -746,8 +804,14 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) { return Result; } + + if (PerPredRanges) + PredLatticeElements->insert({PhiBB, *EdgeResult}); } + if (PerPredRanges) + TheCache.insertPredecessorResults(PN, BB, *PredLatticeElements); + // Return the merged value, which is more precise than 'overdefined'. assert(!Result.isOverdefined() && "Possible PHI in entry block?"); return Result; @@ -1002,7 +1066,77 @@ LazyValueInfoImpl::solveBlockValueBinaryOpImpl( const ConstantRange &LHSRange = *LHSRes; const ConstantRange &RHSRange = *RHSRes; - return ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange)); + + std::optional<ValueLatticeElement> MergedResult = + ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange)); + + if (!PerPredRanges) + return MergedResult; + + std::optional<BBLatticeElementMap> PredLHS = + TheCache.getCachedPredecessorInfo(LHS, BB); + if (!PredLHS) + return MergedResult; + std::optional<BBLatticeElementMap> PredRHS = + TheCache.getCachedPredecessorInfo(RHS, BB); + if (!PredRHS) + return MergedResult; + + const BBLatticeElementMap &LHSPredMap = *PredLHS; + const BBLatticeElementMap &RHSPredMap = *PredRHS; + + BBLatticeElementMap PredLatticeElements; + ValueLatticeElement OverallPredResult; + for (auto *Pred : predecessors(BB)) { + auto LHSIt = LHSPredMap.find_as(Pred); + if (LHSIt == LHSPredMap.end()) + return MergedResult; + const ValueLatticeElement &LHSFromPred = LHSIt->second; + std::optional<ConstantRange> LHSFromPredRes = + LHSFromPred.asConstantRange(LHS->getType()); + if (!LHSFromPredRes) + return MergedResult; + + auto RHSIt = RHSPredMap.find_as(Pred); + if (RHSIt == RHSPredMap.end()) + return MergedResult; + const ValueLatticeElement &RHSFromPred = RHSIt->second; + std::optional<ConstantRange> RHSFromPredRes = + RHSFromPred.asConstantRange(RHS->getType()); + if (!RHSFromPredRes) + return MergedResult; + + const ConstantRange &LHSFromPredRange = *LHSFromPredRes; + const ConstantRange &RHSFromPredRange = *RHSFromPredRes; + std::optional<ValueLatticeElement> PredResult = + ValueLatticeElement::getRange(OpFn(LHSFromPredRange, RHSFromPredRange)); + if (!PredResult) + return MergedResult; + if (PredResult->isOverdefined()) { + LLVM_DEBUG( + dbgs() << " pred BB '" << Pred->getName() << "' for BB '" + << BB->getName() + << "' overdefined. Discarding all predecessor intervals.\n"); + return MergedResult; + } + PredLatticeElements.insert({Pred, *PredResult}); + OverallPredResult.mergeIn(*PredResult); + } + + // If this point is reached, all predecessors for both LHS and RHS have + // constant ranges previously computed. Can cache result and use the + // OverallPredResult; + TheCache.insertPredecessorResults(I, BB, PredLatticeElements); + + LLVM_DEBUG(dbgs() << " Using predecessor intervals, evaluated " << *I + << " to: " << OverallPredResult << ".\n"); + + if (!MergedResult) + return OverallPredResult; + + LLVM_DEBUG(dbgs() << " Intersecting intervals for " << *I << ": " + << OverallPredResult << " and " << MergedResult << ".\n"); + return MergedResult->intersect(OverallPredResult); } std::optional<ValueLatticeElement> diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt index 6ed724b..7ae5f7e 100644 --- a/llvm/lib/CAS/CMakeLists.txt +++ b/llvm/lib/CAS/CMakeLists.txt @@ -2,14 +2,19 @@ add_llvm_component_library(LLVMCAS ActionCache.cpp ActionCaches.cpp BuiltinCAS.cpp + DatabaseFile.cpp InMemoryCAS.cpp MappedFileRegionArena.cpp ObjectStore.cpp OnDiskCommon.cpp + OnDiskTrieRawHashMap.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS + LINK_LIBS + ${LLVM_PTHREAD_LIB} + LINK_COMPONENTS Support ) diff --git a/llvm/lib/CAS/DatabaseFile.cpp b/llvm/lib/CAS/DatabaseFile.cpp new file mode 100644 index 0000000..db8ce1d --- /dev/null +++ b/llvm/lib/CAS/DatabaseFile.cpp @@ -0,0 +1,123 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file implements the common abstractions for CAS database file. +/// +//===----------------------------------------------------------------------===// + +#include "DatabaseFile.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +Error ondisk::createTableConfigError(std::errc ErrC, StringRef Path, + StringRef TableName, const Twine &Msg) { + return createStringError(make_error_code(ErrC), + Path + "[" + TableName + "]: " + Msg); +} + +Error ondisk::checkTable(StringRef Label, size_t Expected, size_t Observed, + StringRef Path, StringRef TrieName) { + if (Expected == Observed) + return Error::success(); + return createTableConfigError(std::errc::invalid_argument, Path, TrieName, + "mismatched " + Label + + " (expected: " + Twine(Expected) + + ", observed: " + Twine(Observed) + ")"); +} + +Expected<DatabaseFile> +DatabaseFile::create(const Twine &Path, uint64_t Capacity, + function_ref<Error(DatabaseFile &)> NewDBConstructor) { + // Constructor for if the file doesn't exist. + auto NewFileConstructor = [&](MappedFileRegionArena &Alloc) -> Error { + if (Alloc.capacity() < + sizeof(Header) + sizeof(MappedFileRegionArena::Header)) + return createTableConfigError(std::errc::argument_out_of_domain, + Path.str(), "datafile", + "Allocator too small for header"); + (void)new (Alloc.data()) Header{getMagic(), getVersion(), {0}}; + DatabaseFile DB(Alloc); + return NewDBConstructor(DB); + }; + + // Get or create the file. + MappedFileRegionArena Alloc; + if (Error E = MappedFileRegionArena::create(Path, Capacity, sizeof(Header), + NewFileConstructor) + .moveInto(Alloc)) + return std::move(E); + + return DatabaseFile::get( + std::make_unique<MappedFileRegionArena>(std::move(Alloc))); +} + +Error DatabaseFile::addTable(TableHandle Table) { + assert(Table); + assert(&Table.getRegion() == &getRegion()); + int64_t ExistingRootOffset = 0; + const int64_t NewOffset = + reinterpret_cast<const char *>(&Table.getHeader()) - getRegion().data(); + if (H->RootTableOffset.compare_exchange_strong(ExistingRootOffset, NewOffset)) + return Error::success(); + + // Silently ignore attempts to set the root to itself. + if (ExistingRootOffset == NewOffset) + return Error::success(); + + // Return an proper error message. + TableHandle Root(getRegion(), ExistingRootOffset); + if (Root.getName() == Table.getName()) + return createStringError( + make_error_code(std::errc::not_supported), + "collision with existing table of the same name '" + Table.getName() + + "'"); + + return createStringError(make_error_code(std::errc::not_supported), + "cannot add new table '" + Table.getName() + + "'" + " to existing root '" + + Root.getName() + "'"); +} + +std::optional<TableHandle> DatabaseFile::findTable(StringRef Name) { + int64_t RootTableOffset = H->RootTableOffset.load(); + if (!RootTableOffset) + return std::nullopt; + + TableHandle Root(getRegion(), RootTableOffset); + if (Root.getName() == Name) + return Root; + + return std::nullopt; +} + +Error DatabaseFile::validate(MappedFileRegion &Region) { + if (Region.size() < sizeof(Header)) + return createStringError(std::errc::invalid_argument, + "database: missing header"); + + // Check the magic and version. + auto *H = reinterpret_cast<Header *>(Region.data()); + if (H->Magic != getMagic()) + return createStringError(std::errc::invalid_argument, + "database: bad magic"); + if (H->Version != getVersion()) + return createStringError(std::errc::invalid_argument, + "database: wrong version"); + + auto *MFH = reinterpret_cast<MappedFileRegionArena::Header *>(Region.data() + + sizeof(Header)); + // Check the bump-ptr, which should point past the header. + if (MFH->BumpPtr.load() < (int64_t)sizeof(Header)) + return createStringError(std::errc::invalid_argument, + "database: corrupt bump-ptr"); + + return Error::success(); +} diff --git a/llvm/lib/CAS/DatabaseFile.h b/llvm/lib/CAS/DatabaseFile.h new file mode 100644 index 0000000..609e5f13 --- /dev/null +++ b/llvm/lib/CAS/DatabaseFile.h @@ -0,0 +1,153 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares the common interface for a DatabaseFile that is used to +/// implement OnDiskCAS. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CAS_DATABASEFILE_H +#define LLVM_LIB_CAS_DATABASEFILE_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/CAS/MappedFileRegionArena.h" +#include "llvm/Support/Error.h" + +namespace llvm::cas::ondisk { + +using MappedFileRegion = MappedFileRegionArena::RegionT; + +/// Generic handle for a table. +/// +/// Generic table header layout: +/// - 2-bytes: TableKind +/// - 2-bytes: TableNameSize +/// - 4-bytes: TableNameRelOffset (relative to header) +class TableHandle { +public: + enum class TableKind : uint16_t { + TrieRawHashMap = 1, + DataAllocator = 2, + }; + struct Header { + TableKind Kind; + uint16_t NameSize; + int32_t NameRelOffset; ///< Relative to Header. + }; + + explicit operator bool() const { return H; } + const Header &getHeader() const { return *H; } + MappedFileRegion &getRegion() const { return *Region; } + + template <class T> static void check() { + static_assert( + std::is_same<decltype(T::Header::GenericHeader), Header>::value, + "T::GenericHeader should be of type TableHandle::Header"); + static_assert(offsetof(typename T::Header, GenericHeader) == 0, + "T::GenericHeader must be the head of T::Header"); + } + template <class T> bool is() const { return T::Kind == H->Kind; } + template <class T> T dyn_cast() const { + check<T>(); + if (is<T>()) + return T(*Region, *reinterpret_cast<typename T::Header *>(H)); + return T(); + } + template <class T> T cast() const { + assert(is<T>()); + return dyn_cast<T>(); + } + + StringRef getName() const { + auto *Begin = reinterpret_cast<const char *>(H) + H->NameRelOffset; + return StringRef(Begin, H->NameSize); + } + + TableHandle() = default; + TableHandle(MappedFileRegion &Region, Header &H) : Region(&Region), H(&H) {} + TableHandle(MappedFileRegion &Region, intptr_t HeaderOffset) + : TableHandle(Region, + *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) { + } + +private: + MappedFileRegion *Region = nullptr; + Header *H = nullptr; +}; + +/// Encapsulate a database file, which: +/// - Sets/checks magic. +/// - Sets/checks version. +/// - Points at an arbitrary root table. +/// - Sets up a MappedFileRegionArena for allocation. +/// +/// Top-level layout: +/// - 4-bytes: Magic +/// - 4-bytes: Version +/// - 8-bytes: RootTableOffset (16-bits: Kind; 48-bits: Offset) +/// - 8-bytes: BumpPtr from MappedFileRegionArena +class DatabaseFile { +public: + static constexpr uint32_t getMagic() { return 0xDA7ABA53UL; } + static constexpr uint32_t getVersion() { return 1UL; } + struct Header { + uint32_t Magic; + uint32_t Version; + std::atomic<int64_t> RootTableOffset; + }; + + const Header &getHeader() { return *H; } + MappedFileRegionArena &getAlloc() { return Alloc; } + MappedFileRegion &getRegion() { return Alloc.getRegion(); } + + /// Add a table. This is currently not thread safe and should be called inside + /// NewDBConstructor. + Error addTable(TableHandle Table); + + /// Find a table. May return null. + std::optional<TableHandle> findTable(StringRef Name); + + /// Create the DatabaseFile at Path with Capacity. + static Expected<DatabaseFile> + create(const Twine &Path, uint64_t Capacity, + function_ref<Error(DatabaseFile &)> NewDBConstructor); + + size_t size() const { return Alloc.size(); } + +private: + static Expected<DatabaseFile> + get(std::unique_ptr<MappedFileRegionArena> Alloc) { + if (Error E = validate(Alloc->getRegion())) + return std::move(E); + return DatabaseFile(std::move(Alloc)); + } + + static Error validate(MappedFileRegion &Region); + + DatabaseFile(MappedFileRegionArena &Alloc) + : H(reinterpret_cast<Header *>(Alloc.data())), Alloc(Alloc) {} + DatabaseFile(std::unique_ptr<MappedFileRegionArena> Alloc) + : DatabaseFile(*Alloc) { + OwnedAlloc = std::move(Alloc); + } + + Header *H = nullptr; + MappedFileRegionArena &Alloc; + std::unique_ptr<MappedFileRegionArena> OwnedAlloc; +}; + +Error createTableConfigError(std::errc ErrC, StringRef Path, + StringRef TableName, const Twine &Msg); + +Error checkTable(StringRef Label, size_t Expected, size_t Observed, + StringRef Path, StringRef TrieName); + +} // namespace llvm::cas::ondisk + +#endif diff --git a/llvm/lib/CAS/InMemoryCAS.cpp b/llvm/lib/CAS/InMemoryCAS.cpp index 255b89c..c63ee70d 100644 --- a/llvm/lib/CAS/InMemoryCAS.cpp +++ b/llvm/lib/CAS/InMemoryCAS.cpp @@ -57,6 +57,9 @@ public: InMemoryObject() = delete; InMemoryObject(InMemoryObject &&) = delete; InMemoryObject(const InMemoryObject &) = delete; + InMemoryObject &operator=(const InMemoryObject &) = delete; + InMemoryObject &operator=(InMemoryObject &&) = delete; + virtual ~InMemoryObject() = default; protected: InMemoryObject(Kind K, const InMemoryIndexValueT &I) : IndexAndKind(&I, K) {} diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp new file mode 100644 index 0000000..9b382dd7 --- /dev/null +++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp @@ -0,0 +1,1178 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file Implements OnDiskTrieRawHashMap. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskTrieRawHashMap.h" +#include "DatabaseFile.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/TrieHashIndexGenerator.h" +#include "llvm/CAS/MappedFileRegionArena.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/Threading.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +#if LLVM_ENABLE_ONDISK_CAS + +//===----------------------------------------------------------------------===// +// TrieRawHashMap data structures. +//===----------------------------------------------------------------------===// + +namespace { + +class SubtrieHandle; +class TrieRawHashMapHandle; +class TrieVisitor; + +/// A value stored in the slots inside a SubTrie. A stored value can either be a +/// subtrie (encoded after negation) which is the file offset to another +/// subtrie, or it can be a fileset to a DataRecord. +class SubtrieSlotValue { +public: + explicit operator bool() const { return !isEmpty(); } + bool isEmpty() const { return !Offset; } + bool isData() const { return Offset > 0; } + bool isSubtrie() const { return Offset < 0; } + uint64_t asData() const { + assert(isData()); + return Offset; + } + uint64_t asSubtrie() const { + assert(isSubtrie()); + return -Offset; + } + + FileOffset asSubtrieFileOffset() const { return FileOffset(asSubtrie()); } + + FileOffset asDataFileOffset() const { return FileOffset(asData()); } + + int64_t getRawOffset() const { return Offset; } + + static SubtrieSlotValue getDataOffset(int64_t Offset) { + return SubtrieSlotValue(Offset); + } + + static SubtrieSlotValue getSubtrieOffset(int64_t Offset) { + return SubtrieSlotValue(-Offset); + } + + static SubtrieSlotValue getDataOffset(FileOffset Offset) { + return getDataOffset(Offset.get()); + } + + static SubtrieSlotValue getSubtrieOffset(FileOffset Offset) { + return getDataOffset(Offset.get()); + } + + static SubtrieSlotValue getFromSlot(std::atomic<int64_t> &Slot) { + return SubtrieSlotValue(Slot.load()); + } + + SubtrieSlotValue() = default; + +private: + friend class SubtrieHandle; + explicit SubtrieSlotValue(int64_t Offset) : Offset(Offset) {} + int64_t Offset = 0; +}; + +/// Subtrie layout: +/// - 2-bytes: StartBit +/// - 1-bytes: NumBits=lg(num-slots) +/// - 5-bytes: 0-pad +/// - <slots> +class SubtrieHandle { +public: + struct Header { + /// The bit this subtrie starts on. + uint16_t StartBit; + + /// The number of bits this subtrie handles. It has 2^NumBits slots. + uint8_t NumBits; + + /// 0-pad to 8B. + uint8_t ZeroPad1B; + uint32_t ZeroPad4B; + }; + + /// Slot storage: + /// - zero: Empty + /// - positive: RecordOffset + /// - negative: SubtrieOffset + using SlotT = std::atomic<int64_t>; + + static int64_t getSlotsSize(uint32_t NumBits) { + return sizeof(int64_t) * (1u << NumBits); + } + + static int64_t getSize(uint32_t NumBits) { + return sizeof(SubtrieHandle::Header) + getSlotsSize(NumBits); + } + + int64_t getSize() const { return getSize(H->NumBits); } + size_t getNumSlots() const { return Slots.size(); } + + SubtrieSlotValue load(size_t I) const { + return SubtrieSlotValue(Slots[I].load()); + } + void store(size_t I, SubtrieSlotValue V) { + return Slots[I].store(V.getRawOffset()); + } + + void printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const; + + /// Return None on success, or the existing offset on failure. + bool compare_exchange_strong(size_t I, SubtrieSlotValue &Expected, + SubtrieSlotValue New) { + return Slots[I].compare_exchange_strong(Expected.Offset, New.Offset); + } + + /// Sink \p V from \p I in this subtrie down to \p NewI in a new subtrie with + /// \p NumSubtrieBits. + /// + /// \p UnusedSubtrie maintains a 1-item "free" list of unused subtries. If a + /// new subtrie is created that isn't used because of a lost race, then it If + /// it's already valid, it should be used instead of allocating a new one. + /// should be returned as an out parameter to be passed back in the future. + /// If it's already valid, it should be used instead of allocating a new one. + /// + /// Returns the subtrie that now lives at \p I. + Expected<SubtrieHandle> sink(size_t I, SubtrieSlotValue V, + MappedFileRegionArena &Alloc, + size_t NumSubtrieBits, + SubtrieHandle &UnusedSubtrie, size_t NewI); + + /// Only safe if the subtrie is empty. + void reinitialize(uint32_t StartBit, uint32_t NumBits); + + SubtrieSlotValue getOffset() const { + return SubtrieSlotValue::getSubtrieOffset( + reinterpret_cast<const char *>(H) - Region->data()); + } + + FileOffset getFileOffset() const { return getOffset().asSubtrieFileOffset(); } + + explicit operator bool() const { return H; } + + Header &getHeader() const { return *H; } + uint32_t getStartBit() const { return H->StartBit; } + uint32_t getNumBits() const { return H->NumBits; } + + static Expected<SubtrieHandle> create(MappedFileRegionArena &Alloc, + uint32_t StartBit, uint32_t NumBits); + + static SubtrieHandle getFromFileOffset(MappedFileRegion &Region, + FileOffset Offset) { + return SubtrieHandle(Region, SubtrieSlotValue::getSubtrieOffset(Offset)); + } + + SubtrieHandle() = default; + SubtrieHandle(MappedFileRegion &Region, Header &H) + : Region(&Region), H(&H), Slots(getSlots(H)) {} + SubtrieHandle(MappedFileRegion &Region, SubtrieSlotValue Offset) + : SubtrieHandle(Region, *reinterpret_cast<Header *>( + Region.data() + Offset.asSubtrie())) {} + +private: + MappedFileRegion *Region = nullptr; + Header *H = nullptr; + MutableArrayRef<SlotT> Slots; + + static MutableArrayRef<SlotT> getSlots(Header &H) { + return MutableArrayRef(reinterpret_cast<SlotT *>(&H + 1), 1u << H.NumBits); + } +}; + +/// Handle for a TrieRawHashMap table. +/// +/// TrieRawHashMap table layout: +/// - [8-bytes: Generic table header] +/// - 1-byte: NumSubtrieBits +/// - 1-byte: Flags (not used yet) +/// - 2-bytes: NumHashBits +/// - 4-bytes: RecordDataSize (in bytes) +/// - 8-bytes: RootTrieOffset +/// - 8-bytes: AllocatorOffset (reserved for implementing free lists) +/// - <name> '\0' +/// +/// Record layout: +/// - <hash> +/// - <data> +class TrieRawHashMapHandle { +public: + static constexpr TableHandle::TableKind Kind = + TableHandle::TableKind::TrieRawHashMap; + + struct Header { + TableHandle::Header GenericHeader; + uint8_t NumSubtrieBits; + uint8_t Flags; ///< None used yet. + uint16_t NumHashBits; + uint32_t RecordDataSize; + std::atomic<int64_t> RootTrieOffset; + std::atomic<int64_t> AllocatorOffset; + }; + + operator TableHandle() const { + if (!H) + return TableHandle(); + return TableHandle(*Region, H->GenericHeader); + } + + struct RecordData { + OnDiskTrieRawHashMap::ValueProxy Proxy; + SubtrieSlotValue Offset; + FileOffset getFileOffset() const { return Offset.asDataFileOffset(); } + }; + + enum Limits : size_t { + /// Seems like 65528 hash bits ought to be enough. + MaxNumHashBytes = UINT16_MAX >> 3, + MaxNumHashBits = MaxNumHashBytes << 3, + + /// 2^16 bits in a trie is 65536 slots. This restricts us to a 16-bit + /// index. This many slots is suspicously large anyway. + MaxNumRootBits = 16, + + /// 2^10 bits in a trie is 1024 slots. This many slots seems suspiciously + /// large for subtries. + MaxNumSubtrieBits = 10, + }; + + static constexpr size_t getNumHashBytes(size_t NumHashBits) { + assert(NumHashBits % 8 == 0); + return NumHashBits / 8; + } + static constexpr size_t getRecordSize(size_t RecordDataSize, + size_t NumHashBits) { + return RecordDataSize + getNumHashBytes(NumHashBits); + } + + RecordData getRecord(SubtrieSlotValue Offset); + Expected<RecordData> createRecord(MappedFileRegionArena &Alloc, + ArrayRef<uint8_t> Hash); + + explicit operator bool() const { return H; } + const Header &getHeader() const { return *H; } + SubtrieHandle getRoot() const; + Expected<SubtrieHandle> getOrCreateRoot(MappedFileRegionArena &Alloc); + MappedFileRegion &getRegion() const { return *Region; } + + size_t getFlags() const { return H->Flags; } + size_t getNumSubtrieBits() const { return H->NumSubtrieBits; } + size_t getNumHashBits() const { return H->NumHashBits; } + size_t getNumHashBytes() const { return getNumHashBytes(H->NumHashBits); } + size_t getRecordDataSize() const { return H->RecordDataSize; } + size_t getRecordSize() const { + return getRecordSize(H->RecordDataSize, H->NumHashBits); + } + + TrieHashIndexGenerator getIndexGen(SubtrieHandle Root, + ArrayRef<uint8_t> Hash) { + assert(Root.getStartBit() == 0); + assert(getNumHashBytes() == Hash.size()); + assert(getNumHashBits() == Hash.size() * 8); + return TrieHashIndexGenerator{Root.getNumBits(), getNumSubtrieBits(), Hash}; + } + + static Expected<TrieRawHashMapHandle> + create(MappedFileRegionArena &Alloc, StringRef Name, + std::optional<uint64_t> NumRootBits, uint64_t NumSubtrieBits, + uint64_t NumHashBits, uint64_t RecordDataSize); + + void + print(raw_ostream &OS, + function_ref<void(ArrayRef<char>)> PrintRecordData = nullptr) const; + + Error validate( + function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)> + RecordVerifier) const; + TrieRawHashMapHandle() = default; + TrieRawHashMapHandle(MappedFileRegion &Region, Header &H) + : Region(&Region), H(&H) {} + TrieRawHashMapHandle(MappedFileRegion &Region, intptr_t HeaderOffset) + : TrieRawHashMapHandle( + Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) { + } + +private: + MappedFileRegion *Region = nullptr; + Header *H = nullptr; +}; + +} // end anonymous namespace + +struct OnDiskTrieRawHashMap::ImplType { + DatabaseFile File; + TrieRawHashMapHandle Trie; +}; + +Expected<SubtrieHandle> SubtrieHandle::create(MappedFileRegionArena &Alloc, + uint32_t StartBit, + uint32_t NumBits) { + assert(StartBit <= TrieRawHashMapHandle::MaxNumHashBits); + assert(NumBits <= UINT8_MAX); + assert(NumBits <= TrieRawHashMapHandle::MaxNumRootBits); + + auto Mem = Alloc.allocate(getSize(NumBits)); + if (LLVM_UNLIKELY(!Mem)) + return Mem.takeError(); + auto *H = + new (*Mem) SubtrieHandle::Header{(uint16_t)StartBit, (uint8_t)NumBits, + /*ZeroPad1B=*/0, /*ZeroPad4B=*/0}; + SubtrieHandle S(Alloc.getRegion(), *H); + for (auto I = S.Slots.begin(), E = S.Slots.end(); I != E; ++I) + new (I) SlotT(0); + return S; +} + +SubtrieHandle TrieRawHashMapHandle::getRoot() const { + if (int64_t Root = H->RootTrieOffset) + return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Root)); + return SubtrieHandle(); +} + +Expected<SubtrieHandle> +TrieRawHashMapHandle::getOrCreateRoot(MappedFileRegionArena &Alloc) { + assert(&Alloc.getRegion() == &getRegion()); + if (SubtrieHandle Root = getRoot()) + return Root; + + int64_t Race = 0; + auto LazyRoot = SubtrieHandle::create(Alloc, 0, H->NumSubtrieBits); + if (LLVM_UNLIKELY(!LazyRoot)) + return LazyRoot.takeError(); + if (H->RootTrieOffset.compare_exchange_strong( + Race, LazyRoot->getOffset().asSubtrie())) + return *LazyRoot; + + // There was a race. Return the other root. + // + // TODO: Avoid leaking the lazy root by storing it in an allocator. + return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Race)); +} + +Expected<TrieRawHashMapHandle> +TrieRawHashMapHandle::create(MappedFileRegionArena &Alloc, StringRef Name, + std::optional<uint64_t> NumRootBits, + uint64_t NumSubtrieBits, uint64_t NumHashBits, + uint64_t RecordDataSize) { + // Allocate. + auto Offset = Alloc.allocateOffset(sizeof(Header) + Name.size() + 1); + if (LLVM_UNLIKELY(!Offset)) + return Offset.takeError(); + + // Construct the header and the name. + assert(Name.size() <= UINT16_MAX && "Expected smaller table name"); + assert(NumSubtrieBits <= UINT8_MAX && "Expected valid subtrie bits"); + assert(NumHashBits <= UINT16_MAX && "Expected valid hash size"); + assert(RecordDataSize <= UINT32_MAX && "Expected smaller table name"); + auto *H = new (Alloc.getRegion().data() + *Offset) + Header{{TableHandle::TableKind::TrieRawHashMap, (uint16_t)Name.size(), + (uint32_t)sizeof(Header)}, + (uint8_t)NumSubtrieBits, + /*Flags=*/0, + (uint16_t)NumHashBits, + (uint32_t)RecordDataSize, + /*RootTrieOffset=*/{0}, + /*AllocatorOffset=*/{0}}; + char *NameStorage = reinterpret_cast<char *>(H + 1); + llvm::copy(Name, NameStorage); + NameStorage[Name.size()] = 0; + + // Construct a root trie, if requested. + TrieRawHashMapHandle Trie(Alloc.getRegion(), *H); + auto Sub = SubtrieHandle::create(Alloc, 0, *NumRootBits); + if (LLVM_UNLIKELY(!Sub)) + return Sub.takeError(); + if (NumRootBits) + H->RootTrieOffset = Sub->getOffset().asSubtrie(); + return Trie; +} + +TrieRawHashMapHandle::RecordData +TrieRawHashMapHandle::getRecord(SubtrieSlotValue Offset) { + char *Begin = Region->data() + Offset.asData(); + OnDiskTrieRawHashMap::ValueProxy Proxy; + Proxy.Data = MutableArrayRef(Begin, getRecordDataSize()); + Proxy.Hash = ArrayRef(reinterpret_cast<const uint8_t *>(Proxy.Data.end()), + getNumHashBytes()); + return RecordData{Proxy, Offset}; +} + +Expected<TrieRawHashMapHandle::RecordData> +TrieRawHashMapHandle::createRecord(MappedFileRegionArena &Alloc, + ArrayRef<uint8_t> Hash) { + assert(&Alloc.getRegion() == Region); + assert(Hash.size() == getNumHashBytes()); + auto Offset = Alloc.allocateOffset(getRecordSize()); + if (LLVM_UNLIKELY(!Offset)) + return Offset.takeError(); + + RecordData Record = getRecord(SubtrieSlotValue::getDataOffset(*Offset)); + llvm::copy(Hash, const_cast<uint8_t *>(Record.Proxy.Hash.begin())); + return Record; +} + +Expected<OnDiskTrieRawHashMap::const_pointer> +OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { + // Check alignment. + if (!isAligned(MappedFileRegionArena::getAlign(), Offset.get())) + return createStringError(make_error_code(std::errc::protocol_error), + "unaligned file offset at 0x" + + utohexstr(Offset.get(), /*LowerCase=*/true)); + + // Check bounds. + // + // Note: There's no potential overflow when using \c uint64_t because Offset + // is in valid offset range and the record size is in \c [0,UINT32_MAX]. + if (!validOffset(Offset) || + Offset.get() + Impl->Trie.getRecordSize() > Impl->File.getAlloc().size()) + return createStringError(make_error_code(std::errc::protocol_error), + "file offset too large: 0x" + + utohexstr(Offset.get(), /*LowerCase=*/true)); + + // Looks okay... + TrieRawHashMapHandle::RecordData D = + Impl->Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset)); + return const_pointer(D.Proxy, D.getFileOffset()); +} + +OnDiskTrieRawHashMap::const_pointer +OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const { + TrieRawHashMapHandle Trie = Impl->Trie; + assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash"); + + SubtrieHandle S = Trie.getRoot(); + if (!S) + return const_pointer(); + + TrieHashIndexGenerator IndexGen = Trie.getIndexGen(S, Hash); + size_t Index = IndexGen.next(); + for (;;) { + // Try to set the content. + SubtrieSlotValue V = S.load(Index); + if (!V) + return const_pointer(); + + // Check for an exact match. + if (V.isData()) { + TrieRawHashMapHandle::RecordData D = Trie.getRecord(V); + return D.Proxy.Hash == Hash ? const_pointer(D.Proxy, D.getFileOffset()) + : const_pointer(); + } + + Index = IndexGen.next(); + S = SubtrieHandle(Trie.getRegion(), V); + } +} + +/// Only safe if the subtrie is empty. +void SubtrieHandle::reinitialize(uint32_t StartBit, uint32_t NumBits) { + assert(StartBit > H->StartBit); + assert(NumBits <= H->NumBits); + // Ideally would also assert that all slots are empty, but that's expensive. + + H->StartBit = StartBit; + H->NumBits = NumBits; +} + +Expected<OnDiskTrieRawHashMap::pointer> +OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, + LazyInsertOnConstructCB OnConstruct, + LazyInsertOnLeakCB OnLeak) { + TrieRawHashMapHandle Trie = Impl->Trie; + assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash"); + + MappedFileRegionArena &Alloc = Impl->File.getAlloc(); + std::optional<SubtrieHandle> S; + auto Err = Trie.getOrCreateRoot(Alloc).moveInto(S); + if (LLVM_UNLIKELY(Err)) + return std::move(Err); + + TrieHashIndexGenerator IndexGen = Trie.getIndexGen(*S, Hash); + size_t Index = IndexGen.next(); + + // Walk through the hash bytes and insert into correct trie position. + std::optional<TrieRawHashMapHandle::RecordData> NewRecord; + SubtrieHandle UnusedSubtrie; + for (;;) { + SubtrieSlotValue Existing = S->load(Index); + + // Try to set it, if it's empty. + if (!Existing) { + if (!NewRecord) { + auto Err = Trie.createRecord(Alloc, Hash).moveInto(NewRecord); + if (LLVM_UNLIKELY(Err)) + return std::move(Err); + if (OnConstruct) + OnConstruct(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy); + } + + if (S->compare_exchange_strong(Index, Existing, NewRecord->Offset)) + return pointer(NewRecord->Proxy, NewRecord->Offset.asDataFileOffset()); + + // Race means that Existing is no longer empty; fall through... + } + + if (Existing.isSubtrie()) { + S = SubtrieHandle(Trie.getRegion(), Existing); + Index = IndexGen.next(); + continue; + } + + // Check for an exact match. + TrieRawHashMapHandle::RecordData ExistingRecord = Trie.getRecord(Existing); + if (ExistingRecord.Proxy.Hash == Hash) { + if (NewRecord && OnLeak) + OnLeak(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy, + ExistingRecord.Offset.asDataFileOffset(), ExistingRecord.Proxy); + return pointer(ExistingRecord.Proxy, + ExistingRecord.Offset.asDataFileOffset()); + } + + // Sink the existing content as long as the indexes match. + for (;;) { + size_t NextIndex = IndexGen.next(); + size_t NewIndexForExistingContent = + IndexGen.getCollidingBits(ExistingRecord.Proxy.Hash); + + auto Err = S->sink(Index, Existing, Alloc, IndexGen.getNumBits(), + UnusedSubtrie, NewIndexForExistingContent) + .moveInto(S); + if (LLVM_UNLIKELY(Err)) + return std::move(Err); + Index = NextIndex; + + // Found the difference. + if (NextIndex != NewIndexForExistingContent) + break; + } + } +} + +Expected<SubtrieHandle> SubtrieHandle::sink(size_t I, SubtrieSlotValue V, + MappedFileRegionArena &Alloc, + size_t NumSubtrieBits, + SubtrieHandle &UnusedSubtrie, + size_t NewI) { + std::optional<SubtrieHandle> NewS; + if (UnusedSubtrie) { + // Steal UnusedSubtrie and initialize it. + NewS.emplace(); + std::swap(*NewS, UnusedSubtrie); + NewS->reinitialize(getStartBit() + getNumBits(), NumSubtrieBits); + } else { + // Allocate a new, empty subtrie. + auto Err = SubtrieHandle::create(Alloc, getStartBit() + getNumBits(), + NumSubtrieBits) + .moveInto(NewS); + if (LLVM_UNLIKELY(Err)) + return std::move(Err); + } + + NewS->store(NewI, V); + if (compare_exchange_strong(I, V, NewS->getOffset())) + return *NewS; // Success! + + // Raced. + assert(V.isSubtrie() && "Expected racing sink() to add a subtrie"); + + // Wipe out the new slot so NewS can be reused and set the out parameter. + NewS->store(NewI, SubtrieSlotValue()); + UnusedSubtrie = *NewS; + + // Return the subtrie added by the concurrent sink() call. + return SubtrieHandle(Alloc.getRegion(), V); +} + +void OnDiskTrieRawHashMap::print( + raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const { + Impl->Trie.print(OS, PrintRecordData); +} + +Error OnDiskTrieRawHashMap::validate( + function_ref<Error(FileOffset, ConstValueProxy)> RecordVerifier) const { + return Impl->Trie.validate(RecordVerifier); +} + +// Helper function that prints hexdigit and have a sub-byte starting position. +static void printHexDigits(raw_ostream &OS, ArrayRef<uint8_t> Bytes, + size_t StartBit, size_t NumBits) { + assert(StartBit % 4 == 0); + assert(NumBits % 4 == 0); + for (size_t I = StartBit, E = StartBit + NumBits; I != E; I += 4) { + uint8_t HexPair = Bytes[I / 8]; + uint8_t HexDigit = I % 8 == 0 ? HexPair >> 4 : HexPair & 0xf; + OS << hexdigit(HexDigit, /*LowerCase=*/true); + } +} + +static void printBits(raw_ostream &OS, ArrayRef<uint8_t> Bytes, size_t StartBit, + size_t NumBits) { + assert(StartBit + NumBits <= Bytes.size() * 8u); + for (size_t I = StartBit, E = StartBit + NumBits; I != E; ++I) { + uint8_t Byte = Bytes[I / 8]; + size_t ByteOffset = I % 8; + if (size_t ByteShift = 8 - ByteOffset - 1) + Byte >>= ByteShift; + OS << (Byte & 0x1 ? '1' : '0'); + } +} + +void SubtrieHandle::printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const { + // afb[1c:00*01110*0]def + size_t EndBit = getStartBit() + getNumBits(); + size_t HashEndBit = Bytes.size() * 8u; + + size_t FirstBinaryBit = getStartBit() & ~0x3u; + printHexDigits(OS, Bytes, 0, FirstBinaryBit); + + size_t LastBinaryBit = (EndBit + 3u) & ~0x3u; + OS << "["; + printBits(OS, Bytes, FirstBinaryBit, LastBinaryBit - FirstBinaryBit); + OS << "]"; + + printHexDigits(OS, Bytes, LastBinaryBit, HashEndBit - LastBinaryBit); +} + +static void appendIndexBits(std::string &Prefix, size_t Index, + size_t NumSlots) { + std::string Bits; + for (size_t NumBits = 1u; NumBits < NumSlots; NumBits <<= 1) { + Bits.push_back('0' + (Index & 0x1)); + Index >>= 1; + } + for (char Ch : llvm::reverse(Bits)) + Prefix += Ch; +} + +static void printPrefix(raw_ostream &OS, StringRef Prefix) { + while (Prefix.size() >= 4) { + uint8_t Digit; + bool ErrorParsingBinary = Prefix.take_front(4).getAsInteger(2, Digit); + assert(!ErrorParsingBinary); + (void)ErrorParsingBinary; + OS << hexdigit(Digit, /*LowerCase=*/true); + Prefix = Prefix.drop_front(4); + } + if (!Prefix.empty()) + OS << "[" << Prefix << "]"; +} + +LLVM_DUMP_METHOD void OnDiskTrieRawHashMap::dump() const { print(dbgs()); } + +static Expected<size_t> checkParameter(StringRef Label, size_t Max, + std::optional<size_t> Value, + std::optional<size_t> Default, + StringRef Path, StringRef TableName) { + assert(Value || Default); + assert(!Default || *Default <= Max); + if (!Value) + return *Default; + + if (*Value <= Max) + return *Value; + return createTableConfigError( + std::errc::argument_out_of_domain, Path, TableName, + "invalid " + Label + ": " + Twine(*Value) + " (max: " + Twine(Max) + ")"); +} + +size_t OnDiskTrieRawHashMap::size() const { return Impl->File.size(); } +size_t OnDiskTrieRawHashMap::capacity() const { + return Impl->File.getRegion().size(); +} + +Expected<OnDiskTrieRawHashMap> +OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine, + size_t NumHashBits, uint64_t DataSize, + uint64_t MaxFileSize, + std::optional<uint64_t> NewFileInitialSize, + std::optional<size_t> NewTableNumRootBits, + std::optional<size_t> NewTableNumSubtrieBits) { + SmallString<128> PathStorage; + StringRef Path = PathTwine.toStringRef(PathStorage); + SmallString<128> TrieNameStorage; + StringRef TrieName = TrieNameTwine.toStringRef(TrieNameStorage); + + constexpr size_t DefaultNumRootBits = 10; + constexpr size_t DefaultNumSubtrieBits = 6; + + size_t NumRootBits; + if (Error E = checkParameter( + "root bits", TrieRawHashMapHandle::MaxNumRootBits, + NewTableNumRootBits, DefaultNumRootBits, Path, TrieName) + .moveInto(NumRootBits)) + return std::move(E); + + size_t NumSubtrieBits; + if (Error E = checkParameter("subtrie bits", + TrieRawHashMapHandle::MaxNumSubtrieBits, + NewTableNumSubtrieBits, DefaultNumSubtrieBits, + Path, TrieName) + .moveInto(NumSubtrieBits)) + return std::move(E); + + size_t NumHashBytes = NumHashBits >> 3; + if (Error E = + checkParameter("hash size", TrieRawHashMapHandle::MaxNumHashBits, + NumHashBits, std::nullopt, Path, TrieName) + .takeError()) + return std::move(E); + assert(NumHashBits == NumHashBytes << 3 && + "Expected hash size to be byte-aligned"); + if (NumHashBits != NumHashBytes << 3) + return createTableConfigError( + std::errc::argument_out_of_domain, Path, TrieName, + "invalid hash size: " + Twine(NumHashBits) + " (not byte-aligned)"); + + // Constructor for if the file doesn't exist. + auto NewDBConstructor = [&](DatabaseFile &DB) -> Error { + auto Trie = + TrieRawHashMapHandle::create(DB.getAlloc(), TrieName, NumRootBits, + NumSubtrieBits, NumHashBits, DataSize); + if (LLVM_UNLIKELY(!Trie)) + return Trie.takeError(); + + return DB.addTable(*Trie); + }; + + // Get or create the file. + Expected<DatabaseFile> File = + DatabaseFile::create(Path, MaxFileSize, NewDBConstructor); + if (!File) + return File.takeError(); + + // Find the trie and validate it. + std::optional<TableHandle> Table = File->findTable(TrieName); + if (!Table) + return createTableConfigError(std::errc::argument_out_of_domain, Path, + TrieName, "table not found"); + if (Error E = checkTable("table kind", (size_t)TrieRawHashMapHandle::Kind, + (size_t)Table->getHeader().Kind, Path, TrieName)) + return std::move(E); + auto Trie = Table->cast<TrieRawHashMapHandle>(); + assert(Trie && "Already checked the kind"); + + // Check the hash and data size. + if (Error E = checkTable("hash size", NumHashBits, Trie.getNumHashBits(), + Path, TrieName)) + return std::move(E); + if (Error E = checkTable("data size", DataSize, Trie.getRecordDataSize(), + Path, TrieName)) + return std::move(E); + + // No flags supported right now. Either corrupt, or coming from a future + // writer. + if (size_t Flags = Trie.getFlags()) + return createTableConfigError(std::errc::invalid_argument, Path, TrieName, + "unsupported flags: " + Twine(Flags)); + + // Success. + OnDiskTrieRawHashMap::ImplType Impl{DatabaseFile(std::move(*File)), Trie}; + return OnDiskTrieRawHashMap(std::make_unique<ImplType>(std::move(Impl))); +} + +static Error createInvalidTrieError(uint64_t Offset, const Twine &Msg) { + return createStringError(make_error_code(std::errc::protocol_error), + "invalid trie at 0x" + + utohexstr(Offset, /*LowerCase=*/true) + ": " + + Msg); +} + +//===----------------------------------------------------------------------===// +// TrieVisitor data structures. +//===----------------------------------------------------------------------===// + +namespace { +/// A multi-threaded vistior to traverse the Trie. +/// +/// TODO: add more sanity checks that isn't just plain data corruption. For +/// example, some ill-formed data can be constructed to form a cycle using +/// Sub-Tries and it can lead to inifinite loop when visiting (or inserting +/// data). +class TrieVisitor { +public: + TrieVisitor(TrieRawHashMapHandle Trie, unsigned ThreadCount = 0, + unsigned ErrorLimit = 50) + : Trie(Trie), ErrorLimit(ErrorLimit), + Threads(hardware_concurrency(ThreadCount)) {} + virtual ~TrieVisitor() = default; + Error visit(); + +private: + // Virtual method to implement the action when visiting a sub-trie. + virtual Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) { + return Error::success(); + } + + // Virtual method to implement the action when visiting a slot in a trie node. + virtual Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix, + SubtrieSlotValue Slot) { + return Error::success(); + } + +protected: + TrieRawHashMapHandle Trie; + +private: + Error traverseTrieNode(SubtrieHandle Node, StringRef Prefix); + + Error validateSubTrie(SubtrieHandle Node, bool IsRoot); + + // Helper function to capture errors when visiting the trie nodes. + void addError(Error NewError) { + assert(NewError && "not an error"); + std::lock_guard<std::mutex> ErrorLock(Lock); + if (NumError >= ErrorLimit) { + // Too many errors. + consumeError(std::move(NewError)); + return; + } + + if (Err) + Err = joinErrors(std::move(*Err), std::move(NewError)); + else + Err = std::move(NewError); + NumError++; + } + + bool tooManyErrors() { + std::lock_guard<std::mutex> ErrorLock(Lock); + return (bool)Err && NumError >= ErrorLimit; + } + + const unsigned ErrorLimit; + std::optional<Error> Err; + unsigned NumError = 0; + std::mutex Lock; + DefaultThreadPool Threads; +}; + +/// A visitor that traverse and print the Trie. +class TriePrinter : public TrieVisitor { +public: + TriePrinter(TrieRawHashMapHandle Trie, raw_ostream &OS, + function_ref<void(ArrayRef<char>)> PrintRecordData) + : TrieVisitor(Trie, /*ThreadCount=*/1), OS(OS), + PrintRecordData(PrintRecordData) {} + + Error printRecords() { + if (Records.empty()) + return Error::success(); + + OS << "records\n"; + llvm::sort(Records); + for (int64_t Offset : Records) { + TrieRawHashMapHandle::RecordData Record = + Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset)); + if (auto Err = printRecord(Record)) + return Err; + } + return Error::success(); + } + + Error printRecord(TrieRawHashMapHandle::RecordData &Record) { + OS << "- addr=" << (void *)Record.getFileOffset().get() << " "; + if (PrintRecordData) { + PrintRecordData(Record.Proxy.Data); + } else { + OS << "bytes="; + ArrayRef<uint8_t> Data( + reinterpret_cast<const uint8_t *>(Record.Proxy.Data.data()), + Record.Proxy.Data.size()); + printHexDigits(OS, Data, 0, Data.size() * 8); + } + OS << "\n"; + return Error::success(); + } + + Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) override { + if (Prefix.empty()) { + OS << "root"; + } else { + OS << "subtrie="; + printPrefix(OS, Prefix); + } + + OS << " addr=" + << (void *)(reinterpret_cast<const char *>(&SubTrie.getHeader()) - + Trie.getRegion().data()); + OS << " num-slots=" << SubTrie.getNumSlots() << "\n"; + return Error::success(); + } + + Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix, + SubtrieSlotValue Slot) override { + OS << "- index="; + for (size_t Pad : {10, 100, 1000}) + if (I < Pad && Subtrie.getNumSlots() >= Pad) + OS << "0"; + OS << I << " "; + if (Slot.isSubtrie()) { + OS << "addr=" << (void *)Slot.asSubtrie(); + OS << " subtrie="; + printPrefix(OS, Prefix); + OS << "\n"; + return Error::success(); + } + TrieRawHashMapHandle::RecordData Record = Trie.getRecord(Slot); + OS << "addr=" << (void *)Record.getFileOffset().get(); + OS << " content="; + Subtrie.printHash(OS, Record.Proxy.Hash); + OS << "\n"; + Records.push_back(Slot.asData()); + return Error::success(); + } + +private: + raw_ostream &OS; + function_ref<void(ArrayRef<char>)> PrintRecordData; + SmallVector<int64_t> Records; +}; + +/// TrieVerifier that adds additional verification on top of the basic visitor. +class TrieVerifier : public TrieVisitor { +public: + TrieVerifier( + TrieRawHashMapHandle Trie, + function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)> + RecordVerifier) + : TrieVisitor(Trie), RecordVerifier(RecordVerifier) {} + +private: + Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) final { + return Error::success(); + } + + Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix, + SubtrieSlotValue Slot) final { + if (RecordVerifier && Slot.isData()) { + if (!isAligned(MappedFileRegionArena::getAlign(), Slot.asData())) + return createInvalidTrieError(Slot.asData(), "mis-aligned data entry"); + + TrieRawHashMapHandle::RecordData Record = + Trie.getRecord(SubtrieSlotValue::getDataOffset(Slot.asData())); + return RecordVerifier(Slot.asDataFileOffset(), + OnDiskTrieRawHashMap::ConstValueProxy{ + Record.Proxy.Hash, Record.Proxy.Data}); + } + return Error::success(); + } + + function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)> + RecordVerifier; +}; +} // namespace + +Error TrieVisitor::visit() { + auto Root = Trie.getRoot(); + if (!Root) + return Error::success(); + + if (auto Err = validateSubTrie(Root, /*IsRoot=*/true)) + return Err; + + if (auto Err = visitSubTrie("", Root)) + return Err; + + SmallVector<SubtrieHandle> Subs; + SmallVector<std::string> Prefixes; + const size_t NumSlots = Root.getNumSlots(); + for (size_t I = 0, E = NumSlots; I != E; ++I) { + SubtrieSlotValue Slot = Root.load(I); + if (!Slot) + continue; + uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData(); + if (Offset >= (uint64_t)Trie.getRegion().size()) + return createInvalidTrieError(Offset, "slot points out of bound"); + std::string SubtriePrefix; + appendIndexBits(SubtriePrefix, I, NumSlots); + if (Slot.isSubtrie()) { + SubtrieHandle S(Trie.getRegion(), Slot); + Subs.push_back(S); + Prefixes.push_back(SubtriePrefix); + } + if (auto Err = visitSlot(I, Root, SubtriePrefix, Slot)) + return Err; + } + + for (size_t I = 0, E = Subs.size(); I != E; ++I) { + Threads.async( + [&](unsigned Idx) { + // Don't run if there is an error already. + if (tooManyErrors()) + return; + if (auto Err = traverseTrieNode(Subs[Idx], Prefixes[Idx])) + addError(std::move(Err)); + }, + I); + } + + Threads.wait(); + if (Err) + return std::move(*Err); + return Error::success(); +} + +Error TrieVisitor::validateSubTrie(SubtrieHandle Node, bool IsRoot) { + char *Addr = reinterpret_cast<char *>(&Node.getHeader()); + const int64_t Offset = Node.getFileOffset().get(); + if (Addr + Node.getSize() >= + Trie.getRegion().data() + Trie.getRegion().size()) + return createInvalidTrieError(Offset, "subtrie node spans out of bound"); + + if (!IsRoot && + Node.getStartBit() + Node.getNumBits() > Trie.getNumHashBits()) { + return createInvalidTrieError(Offset, + "subtrie represents too many hash bits"); + } + + if (IsRoot) { + if (Node.getStartBit() != 0) + return createInvalidTrieError(Offset, + "root node doesn't start at 0 index"); + + return Error::success(); + } + + if (Node.getNumBits() > Trie.getNumSubtrieBits()) + return createInvalidTrieError(Offset, "subtrie has wrong number of slots"); + + return Error::success(); +} + +Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) { + if (auto Err = validateSubTrie(Node, /*IsRoot=*/false)) + return Err; + + if (auto Err = visitSubTrie(Prefix, Node)) + return Err; + + SmallVector<SubtrieHandle> Subs; + SmallVector<std::string> Prefixes; + const size_t NumSlots = Node.getNumSlots(); + for (size_t I = 0, E = NumSlots; I != E; ++I) { + SubtrieSlotValue Slot = Node.load(I); + if (!Slot) + continue; + uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData(); + if (Offset >= (uint64_t)Trie.getRegion().size()) + return createInvalidTrieError(Offset, "slot points out of bound"); + std::string SubtriePrefix = Prefix.str(); + appendIndexBits(SubtriePrefix, I, NumSlots); + if (Slot.isSubtrie()) { + SubtrieHandle S(Trie.getRegion(), Slot); + Subs.push_back(S); + Prefixes.push_back(SubtriePrefix); + } + if (auto Err = visitSlot(I, Node, SubtriePrefix, Slot)) + return Err; + } + for (size_t I = 0, E = Subs.size(); I != E; ++I) + if (auto Err = traverseTrieNode(Subs[I], Prefixes[I])) + return Err; + + return Error::success(); +} + +void TrieRawHashMapHandle::print( + raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const { + OS << "hash-num-bits=" << getNumHashBits() + << " hash-size=" << getNumHashBytes() + << " record-data-size=" << getRecordDataSize() << "\n"; + + TriePrinter Printer(*this, OS, PrintRecordData); + if (auto Err = Printer.visit()) + OS << "error: " << toString(std::move(Err)) << "\n"; + + if (auto Err = Printer.printRecords()) + OS << "error: " << toString(std::move(Err)) << "\n"; + + return; +} + +Error TrieRawHashMapHandle::validate( + function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)> + RecordVerifier) const { + // Use the base TrieVisitor to identify the errors inside trie first. + TrieVisitor BasicVerifier(*this); + if (auto Err = BasicVerifier.visit()) + return Err; + + // If the trie data structure is sound, do a second pass to verify data and + // verifier function can assume the index is correct. However, there can be + // newly added bad entries that can still produce error. + TrieVerifier Verifier(*this, RecordVerifier); + return Verifier.visit(); +} + +#else // !LLVM_ENABLE_ONDISK_CAS + +struct OnDiskTrieRawHashMap::ImplType {}; + +Expected<OnDiskTrieRawHashMap> +OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine, + size_t NumHashBits, uint64_t DataSize, + uint64_t MaxFileSize, + std::optional<uint64_t> NewFileInitialSize, + std::optional<size_t> NewTableNumRootBits, + std::optional<size_t> NewTableNumSubtrieBits) { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskTrieRawHashMap is not supported"); +} + +Expected<OnDiskTrieRawHashMap::pointer> +OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, + LazyInsertOnConstructCB OnConstruct, + LazyInsertOnLeakCB OnLeak) { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskTrieRawHashMap is not supported"); +} + +Expected<OnDiskTrieRawHashMap::const_pointer> +OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskTrieRawHashMap is not supported"); +} + +OnDiskTrieRawHashMap::const_pointer +OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const { + return const_pointer(); +} + +void OnDiskTrieRawHashMap::print( + raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const { +} + +Error OnDiskTrieRawHashMap::validate( + function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)> + RecordVerifier) const { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskTrieRawHashMap is not supported"); +} + +size_t OnDiskTrieRawHashMap::size() const { return 0; } +size_t OnDiskTrieRawHashMap::capacity() const { return 0; } + +#endif // LLVM_ENABLE_ONDISK_CAS + +OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(std::unique_ptr<ImplType> Impl) + : Impl(std::move(Impl)) {} +OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(OnDiskTrieRawHashMap &&RHS) = + default; +OnDiskTrieRawHashMap & +OnDiskTrieRawHashMap::operator=(OnDiskTrieRawHashMap &&RHS) = default; +OnDiskTrieRawHashMap::~OnDiskTrieRawHashMap() = default; diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 0f3ff98..d98d180 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -105,6 +105,8 @@ DebugHandlerBase::~DebugHandlerBase() = default; void DebugHandlerBase::beginModule(Module *M) { if (M->debug_compile_units().empty()) Asm = nullptr; + else + LScopes.initialize(*M); } // Each LexicalScope has first instruction and last instruction to mark @@ -269,7 +271,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { // Grab the lexical scopes for the function, if we don't have any of those // then we're not going to be able to do anything. - LScopes.initialize(*MF); + LScopes.scanFunction(*MF); if (LScopes.empty()) { beginFunctionImpl(MF); return; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 67f526f..518121e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -537,8 +537,9 @@ void DwarfCompileUnit::addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName, // and DW_AT_high_pc attributes. If there are global variables in this // scope then create and insert DIEs for these variables. DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP, + const Function &F, MCSymbol *LineTableSym) { - DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes()); + DIE *SPDie = getOrCreateSubprogramDIE(SP, &F, includeMinimalInlineScopes()); SmallVector<RangeSpan, 2> BB_List; // If basic block sections are on, ranges for each basic block section has // to be emitted separately. @@ -1122,9 +1123,10 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { } DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, + const Function &F, LexicalScope *Scope, MCSymbol *LineTableSym) { - DIE &ScopeDIE = updateSubprogramScopeDIE(Sub, LineTableSym); + DIE &ScopeDIE = updateSubprogramScopeDIE(Sub, F, LineTableSym); if (Scope) { assert(!Scope->getInlinedAt()); @@ -1198,32 +1200,17 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, return ObjectPointer; } -void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( - LexicalScope *Scope) { - auto *SP = cast<DISubprogram>(Scope->getScopeNode()); - if (getAbstractScopeDIEs().count(SP)) - return; +DIE &DwarfCompileUnit::getOrCreateAbstractSubprogramDIE( + const DISubprogram *SP) { + if (auto *AbsDef = getAbstractScopeDIEs().lookup(SP)) + return *AbsDef; - DIE *ContextDIE; - DwarfCompileUnit *ContextCU = this; - - if (includeMinimalInlineScopes()) - ContextDIE = &getUnitDie(); - // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with - // the important distinction that the debug node is not associated with the - // DIE (since the debug node will be associated with the concrete DIE, if - // any). It could be refactored to some common utility function. - else if (auto *SPDecl = SP->getDeclaration()) { - ContextDIE = &getUnitDie(); - getOrCreateSubprogramDIE(SPDecl); - } else { - ContextDIE = getOrCreateContextDIE(SP->getScope()); - // The scope may be shared with a subprogram that has already been - // constructed in another CU, in which case we need to construct this - // subprogram in the same CU. - ContextCU = DD->lookupCU(ContextDIE->getUnitDie()); - } + auto [ContextDIE, ContextCU] = getOrCreateAbstractSubprogramContextDIE(SP); + return createAbstractSubprogramDIE(SP, ContextDIE, ContextCU); +} +DIE &DwarfCompileUnit::createAbstractSubprogramDIE( + const DISubprogram *SP, DIE *ContextDIE, DwarfCompileUnit *ContextCU) { // Passing null as the associated node because the abstract definition // shouldn't be found by lookup. DIE &AbsDef = ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, @@ -1237,8 +1224,45 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( DD->getDwarfVersion() <= 4 ? std::optional<dwarf::Form>() : dwarf::DW_FORM_implicit_const, dwarf::DW_INL_inlined); - if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, AbsDef)) - ContextCU->addDIEEntry(AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); + + return AbsDef; +} + +std::pair<DIE *, DwarfCompileUnit *> +DwarfCompileUnit::getOrCreateAbstractSubprogramContextDIE( + const DISubprogram *SP) { + bool Minimal = includeMinimalInlineScopes(); + bool IgnoreScope = shouldPlaceInUnitDIE(SP, Minimal); + DIE *ContextDIE = getOrCreateSubprogramContextDIE(SP, IgnoreScope); + + if (auto *SPDecl = SP->getDeclaration()) + if (!Minimal) + getOrCreateSubprogramDIE(SPDecl, nullptr); + + // The scope may be shared with a subprogram that has already been + // constructed in another CU, in which case we need to construct this + // subprogram in the same CU. + auto *ContextCU = IgnoreScope ? this : DD->lookupCU(ContextDIE->getUnitDie()); + + return std::make_pair(ContextDIE, ContextCU); +} + +void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( + LexicalScope *Scope) { + auto *SP = cast<DISubprogram>(Scope->getScopeNode()); + + // Populate subprogram DIE only once. + if (!getFinalizedAbstractSubprograms().insert(SP).second) + return; + + auto [ContextDIE, ContextCU] = getOrCreateAbstractSubprogramContextDIE(SP); + DIE *AbsDef = getAbstractScopeDIEs().lookup(SP); + if (!AbsDef) + AbsDef = &createAbstractSubprogramDIE(SP, ContextDIE, ContextCU); + + if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef)) + ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, + *ObjectPointer); } bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const { @@ -1293,9 +1317,9 @@ DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const { } DIE &DwarfCompileUnit::constructCallSiteEntryDIE( - DIE &ScopeDIE, const DISubprogram *CalleeSP, bool IsTail, - const MCSymbol *PCAddr, const MCSymbol *CallAddr, unsigned CallReg, - DIType *AllocSiteTy) { + DIE &ScopeDIE, const DISubprogram *CalleeSP, const Function *CalleeF, + bool IsTail, const MCSymbol *PCAddr, const MCSymbol *CallAddr, + unsigned CallReg, DIType *AllocSiteTy) { // Insert a call site entry DIE within ScopeDIE. DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site), ScopeDIE, nullptr); @@ -1305,7 +1329,7 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE( addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target), MachineLocation(CallReg)); } else if (CalleeSP) { - DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP); + DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP, CalleeF); assert(CalleeDIE && "Could not create DIE for call site entry origin"); if (AddLinkageNamesToDeclCallOriginsForTuning(DD) && !CalleeSP->isDefinition() && @@ -1396,7 +1420,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE( if (auto *AbsSPDie = getAbstractScopeDIEs().lookup(SP)) EntityDie = AbsSPDie; else - EntityDie = getOrCreateSubprogramDIE(SP); + EntityDie = getOrCreateSubprogramDIE(SP, nullptr); } else if (auto *T = dyn_cast<DIType>(Entity)) EntityDie = getOrCreateTypeDIE(T); else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity)) @@ -1805,3 +1829,20 @@ DIE *DwarfCompileUnit::getOrCreateContextDIE(const DIScope *Context) { } return DwarfUnit::getOrCreateContextDIE(Context); } + +DIE *DwarfCompileUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, + const Function *F, + bool Minimal) { + if (!F && SP->isDefinition()) { + F = DD->getLexicalScopes().getFunction(SP); + + if (!F) { + // SP may belong to another CU. Determine the CU similarly + // to DwarfDebug::constructAbstractSubprogramScopeDIE. + return &DD->getOrCreateAbstractSubprogramCU(SP, *this) + .getOrCreateAbstractSubprogramDIE(SP); + } + } + + return DwarfUnit::getOrCreateSubprogramDIE(SP, F, Minimal); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index c2f6ca0..a3bbc83 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -81,6 +81,7 @@ class DwarfCompileUnit final : public DwarfUnit { // List of abstract local scopes (either DISubprogram or DILexicalBlock). DenseMap<const DILocalScope *, DIE *> AbstractLocalScopeDIEs; + SmallPtrSet<const DISubprogram *, 8> FinalizedAbstractSubprograms; // List of inlined lexical block scopes that belong to subprograms within this // CU. @@ -137,12 +138,28 @@ class DwarfCompileUnit final : public DwarfUnit { return DU->getAbstractEntities(); } + auto &getFinalizedAbstractSubprograms() { + if (isDwoUnit() && !DD->shareAcrossDWOCUs()) + return FinalizedAbstractSubprograms; + return DU->getFinalizedAbstractSubprograms(); + } + void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override; /// Add info for Wasm-global-based relocation. void addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName, uint64_t GlobalIndex); + /// Create context DIE for abstract subprogram. + /// \returns The context DIE and the compile unit where abstract + /// DIE should be constructed. + std::pair<DIE *, DwarfCompileUnit *> + getOrCreateAbstractSubprogramContextDIE(const DISubprogram *SP); + + /// Create new DIE for abstract subprogram. + DIE &createAbstractSubprogramDIE(const DISubprogram *SP, DIE *ContextDIE, + DwarfCompileUnit *ContextCU); + public: DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU, @@ -216,7 +233,8 @@ public: /// DW_AT_low_pc, DW_AT_high_pc and DW_AT_LLVM_stmt_sequence attributes. /// If there are global variables in this scope then create and insert DIEs /// for these variables. - DIE &updateSubprogramScopeDIE(const DISubprogram *SP, MCSymbol *LineTableSym); + DIE &updateSubprogramScopeDIE(const DISubprogram *SP, const Function &F, + MCSymbol *LineTableSym); void constructScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE); @@ -259,12 +277,18 @@ public: /// This instance of 'getOrCreateContextDIE()' can handle DILocalScope. DIE *getOrCreateContextDIE(const DIScope *Ty) override; + DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, const Function *F, + bool Minimal = false) override; + /// Construct a DIE for this subprogram scope. - DIE &constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope, - MCSymbol *LineTableSym); + DIE &constructSubprogramScopeDIE(const DISubprogram *Sub, const Function &F, + LexicalScope *Scope, MCSymbol *LineTableSym); DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE); + /// Create an abstract subprogram DIE, that should later be populated + /// by \ref constructAbstractSubprogramScopeDIE. + DIE &getOrCreateAbstractSubprogramDIE(const DISubprogram *SP); void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); /// Whether to use the GNU analog for a DWARF5 tag, attribute, or location @@ -281,14 +305,15 @@ public: dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const; /// Construct a call site entry DIE describing a call within \p Scope to a - /// callee described by \p CalleeSP. + /// callee described by \p CalleeSP and \p CalleeF. /// \p IsTail specifies whether the call is a tail call. /// \p PCAddr points to the PC value after the call instruction. /// \p CallAddr points to the PC value at the call instruction (or is null). /// \p CallReg is a register location for an indirect call. For direct calls /// the \p CallReg is set to 0. DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP, - bool IsTail, const MCSymbol *PCAddr, + const Function *CalleeF, bool IsTail, + const MCSymbol *PCAddr, const MCSymbol *CallAddr, unsigned CallReg, DIType *AllocSiteTy); /// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 05d3d18..09d5f9c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -548,6 +548,16 @@ bool DwarfDebug::shareAcrossDWOCUs() const { return SplitDwarfCrossCuReferences; } +DwarfCompileUnit & +DwarfDebug::getOrCreateAbstractSubprogramCU(const DISubprogram *SP, + DwarfCompileUnit &SrcCU) { + auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); + if (CU.getSkeleton()) + return shareAcrossDWOCUs() ? CU : SrcCU; + + return CU; +} + void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope) { assert(Scope && Scope->getScopeNode()); @@ -559,14 +569,11 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); - if (auto *SkelCU = CU.getSkeleton()) { - (shareAcrossDWOCUs() ? CU : SrcCU) - .constructAbstractSubprogramScopeDIE(Scope); + auto &TargetCU = getOrCreateAbstractSubprogramCU(SP, SrcCU); + TargetCU.constructAbstractSubprogramScopeDIE(Scope); + if (auto *SkelCU = CU.getSkeleton()) if (CU.getCUNode()->getSplitDebugInlining()) SkelCU->constructAbstractSubprogramScopeDIE(Scope); - } else { - CU.constructAbstractSubprogramScopeDIE(Scope); - } } /// Represents a parameter whose call site value can be described by applying a @@ -997,8 +1004,9 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, ->getName(CallReg))) << (IsTail ? " [IsTail]" : "") << "\n"); - DIE &CallSiteDIE = CU.constructCallSiteEntryDIE( - ScopeDIE, CalleeSP, IsTail, PCAddr, CallAddr, CallReg, AllocSiteTy); + DIE &CallSiteDIE = + CU.constructCallSiteEntryDIE(ScopeDIE, CalleeSP, CalleeDecl, IsTail, + PCAddr, CallAddr, CallReg, AllocSiteTy); // Optionally emit call-site-param debug info. if (emitDebugEntryValues()) { @@ -2707,7 +2715,8 @@ void DwarfDebug::skippedNonDebugFunction() { // Gather and emit post-function debug information. void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { - const DISubprogram *SP = MF->getFunction().getSubprogram(); + const Function &F = MF->getFunction(); + const DISubprogram *SP = F.getSubprogram(); assert(CurFn == MF && "endFunction should be called with the same function as beginFunction"); @@ -2776,11 +2785,12 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { ProcessedSPNodes.insert(SP); DIE &ScopeDIE = - TheCU.constructSubprogramScopeDIE(SP, FnScope, FunctionLineTableLabel); + TheCU.constructSubprogramScopeDIE(SP, F, FnScope, FunctionLineTableLabel); if (auto *SkelCU = TheCU.getSkeleton()) if (!LScopes.getAbstractScopesList().empty() && TheCU.getCUNode()->getSplitDebugInlining()) - SkelCU->constructSubprogramScopeDIE(SP, FnScope, FunctionLineTableLabel); + SkelCU->constructSubprogramScopeDIE(SP, F, FnScope, + FunctionLineTableLabel); FunctionLineTableLabel = nullptr; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 89813dc..1a1b28a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -906,6 +906,10 @@ public: return CUDieMap.lookup(Die); } + /// Find the matching DwarfCompileUnit for the given SP referenced from SrcCU. + DwarfCompileUnit &getOrCreateAbstractSubprogramCU(const DISubprogram *SP, + DwarfCompileUnit &SrcCU); + unsigned getStringTypeLoc(const DIStringType *ST) const { return StringTypeLocMap.lookup(ST); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index 0fc2b91..ef1524d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -11,6 +11,7 @@ #include "DwarfStringPool.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DIE.h" @@ -27,6 +28,7 @@ class DbgVariable; class DbgLabel; class DINode; class DILocalScope; +class DISubprogram; class DwarfCompileUnit; class DwarfUnit; class LexicalScope; @@ -94,6 +96,9 @@ class DwarfFile { // Collection of abstract subprogram DIEs. DenseMap<const DILocalScope *, DIE *> AbstractLocalScopeDIEs; DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities; + /// Keeps track of abstract subprograms to populate them only once. + // FIXME: merge creation and population of abstract scopes. + SmallPtrSet<const DISubprogram *, 8> FinalizedAbstractSubprograms; /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can /// be shared across CUs, that is why we keep the map here instead @@ -174,6 +179,10 @@ public: return AbstractEntities; } + auto &getFinalizedAbstractSubprograms() { + return FinalizedAbstractSubprograms; + } + void insertDIE(const MDNode *TypeMD, DIE *Die) { DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index d76fd0c..62fb5eb 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -573,7 +573,7 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) { if (auto *NS = dyn_cast<DINamespace>(Context)) return getOrCreateNameSpace(NS); if (auto *SP = dyn_cast<DISubprogram>(Context)) - return getOrCreateSubprogramDIE(SP); + return getOrCreateSubprogramDIE(SP, nullptr); if (auto *M = dyn_cast<DIModule>(Context)) return getOrCreateModule(M); return getDIE(Context); @@ -1066,7 +1066,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (!Element) continue; if (auto *SP = dyn_cast<DISubprogram>(Element)) - getOrCreateSubprogramDIE(SP); + getOrCreateSubprogramDIE(SP, nullptr); else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) { if (DDTy->getTag() == dwarf::DW_TAG_friend) { DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); @@ -1335,22 +1335,21 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) { return &MDie; } -DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) { +DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, + const Function *FnHint, bool Minimal) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE (as is the case for member function // declarations). DIE *ContextDIE = - Minimal ? &getUnitDie() : getOrCreateContextDIE(SP->getScope()); + getOrCreateSubprogramContextDIE(SP, shouldPlaceInUnitDIE(SP, Minimal)); if (DIE *SPDie = getDIE(SP)) return SPDie; if (auto *SPDecl = SP->getDeclaration()) { if (!Minimal) { - // Add subprogram definitions to the CU die directly. - ContextDIE = &getUnitDie(); // Build the decl now to ensure it precedes the definition. - getOrCreateSubprogramDIE(SPDecl); + getOrCreateSubprogramDIE(SPDecl, nullptr); // Check whether the DIE for SP has already been created after the call // above. // FIXME: Should the creation of definition subprogram DIE during diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index fe05766..bb00ec3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -256,7 +256,9 @@ public: DIE *getOrCreateNameSpace(const DINamespace *NS); DIE *getOrCreateModule(const DIModule *M); - DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false); + virtual DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, + const Function *FnHint, + bool Minimal = false); void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, bool SkipSPAttributes = false); @@ -343,6 +345,18 @@ protected: /// Emit the common part of the header for this unit. void emitCommonHeader(bool UseOffsets, dwarf::UnitType UT); + bool shouldPlaceInUnitDIE(const DISubprogram *SP, bool Minimal) { + // Add subprogram declarations to the CU die directly. + return Minimal || SP->getDeclaration(); + } + + DIE *getOrCreateSubprogramContextDIE(const DISubprogram *SP, + bool IgnoreScope) { + if (IgnoreScope) + return &getUnitDie(); + return getOrCreateContextDIE(SP->getScope()); + } + private: /// A helper to add a wide integer constant to a DIE using a block /// form. diff --git a/llvm/lib/CodeGen/LexicalScopes.cpp b/llvm/lib/CodeGen/LexicalScopes.cpp index 5916f61..9fc9ac9 100644 --- a/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/llvm/lib/CodeGen/LexicalScopes.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -36,8 +37,16 @@ using namespace llvm; #define DEBUG_TYPE "lexicalscopes" -/// reset - Reset the instance so that it's prepared for another function. -void LexicalScopes::reset() { +static bool skipUnit(const DICompileUnit *CU) { + return CU->getEmissionKind() == DICompileUnit::NoDebug; +} + +void LexicalScopes::resetModule() { + FunctionMap.clear(); + resetFunction(); +} + +void LexicalScopes::resetFunction() { MF = nullptr; CurrentFnLexicalScope = nullptr; LexicalScopeMap.clear(); @@ -47,12 +56,19 @@ void LexicalScopes::reset() { DominatedBlocks.clear(); } -/// initialize - Scan machine function and constuct lexical scope nest. -void LexicalScopes::initialize(const MachineFunction &Fn) { - reset(); +void LexicalScopes::initialize(const Module &M) { + resetModule(); + for (const Function &F : M) { + DISubprogram *SP = F.getSubprogram(); + if (SP && (!SP->getUnit() || !skipUnit(SP->getUnit()))) + FunctionMap[SP] = &F; + } +} + +void LexicalScopes::scanFunction(const MachineFunction &Fn) { + resetFunction(); // Don't attempt any lexical scope creation for a NoDebug compile unit. - if (Fn.getFunction().getSubprogram()->getUnit()->getEmissionKind() == - DICompileUnit::NoDebug) + if (skipUnit(Fn.getFunction().getSubprogram()->getUnit())) return; MF = &Fn; SmallVector<InsnRange, 4> MIRanges; @@ -143,8 +159,7 @@ LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope, const DILocation *IA) { if (IA) { // Skip scopes inlined from a NoDebug compile unit. - if (Scope->getSubprogram()->getUnit()->getEmissionKind() == - DICompileUnit::NoDebug) + if (skipUnit(Scope->getSubprogram()->getUnit())) return getOrCreateLexicalScope(IA); // Create an abstract scope for inlined function. getOrCreateAbstractScope(Scope); diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index a8143bd..0037bdd 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -3721,7 +3721,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, TFI = MF.getSubtarget().getFrameLowering(); TFI->getCalleeSaves(MF, CalleeSavedRegs); MFI = &MF.getFrameInfo(); - LS.initialize(MF); + LS.scanFunction(MF); const auto &STI = MF.getSubtarget(); AdjustsStackInCalls = MFI->adjustsStack() && diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index 82e0c28..b9ea03f 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -2231,7 +2231,7 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TFI->getCalleeSaves(MF, CalleeSavedRegs); this->ShouldEmitDebugEntryValues = ShouldEmitDebugEntryValues; - LS.initialize(MF); + LS.scanFunction(MF); bool Changed = false; bool OLChanged = false; diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 84e9f03..001ba52 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -1263,7 +1263,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, void LiveDebugVariables::LDVImpl::computeIntervals() { LexicalScopes LS; - LS.initialize(*MF); + LS.scanFunction(*MF); for (const auto &UV : userValues) { UV->computeIntervals(MF->getRegInfo(), *TRI, *LIS, LS); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index e911ce8..1154855 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1549,7 +1549,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { report("G_BUILD_VECTOR result element type must match source type", MI); if (DstTy.getNumElements() != MI->getNumOperands() - 1) - report("G_BUILD_VECTOR must have an operand for each elemement", MI); + report("G_BUILD_VECTOR must have an operand for each element", MI); for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2)) if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg())) @@ -2398,11 +2398,11 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // The next two checks allow COPY between physical and virtual registers, // when the virtual register has a scalable size and the physical register - // has a fixed size. These checks allow COPY between *potentialy* mismatched - // sizes. However, once RegisterBankSelection occurs, MachineVerifier should - // be able to resolve a fixed size for the scalable vector, and at that - // point this function will know for sure whether the sizes are mismatched - // and correctly report a size mismatch. + // has a fixed size. These checks allow COPY between *potentially* + // mismatched sizes. However, once RegisterBankSelection occurs, + // MachineVerifier should be able to resolve a fixed size for the scalable + // vector, and at that point this function will know for sure whether the + // sizes are mismatched and correctly report a size mismatch. if (SrcReg.isPhysical() && DstReg.isVirtual() && DstSize.isScalable() && !SrcSize.isScalable()) break; @@ -3213,13 +3213,13 @@ struct VRegFilter { private: static constexpr unsigned SparseUniverseMax = 10 * 1024 * 8; - // VRegs indexed within SparseUniverseMax are tracked by Sparse, those beyound - // are tracked by Dense. The only purpose of the threashold and the Dense set + // VRegs indexed within SparseUniverseMax are tracked by Sparse, those beyond + // are tracked by Dense. The only purpose of the threshold and the Dense set // is to have a reasonably growing memory usage in pathological cases (large // number of very sparse VRegFilter instances live at the same time). In // practice even in the worst-by-execution time cases having all elements // tracked by Sparse (very large SparseUniverseMax scenario) tends to be more - // space efficient than if tracked by Dense. The threashold is set to keep the + // space efficient than if tracked by Dense. The threshold is set to keep the // worst-case memory usage within 2x of figures determined empirically for // "all Dense" scenario in such worst-by-execution-time cases. BitVector Sparse; @@ -3459,7 +3459,7 @@ void MachineVerifier::visitMachineFunctionAfter() { // Check live-in list of each MBB. If a register is live into MBB, check // that the register is in regsLiveOut of each predecessor block. Since - // this must come from a definition in the predecesssor or its live-in + // this must come from a definition in the predecessor or its live-in // list, this will catch a live-through case where the predecessor does not // have the register in its live-in list. This currently only checks // registers that have no aliases, are not allocatable and are not diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c815686..77df4b4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17983,8 +17983,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) { - if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath || - Flags.hasNoSignedZeros()) { + if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) { return N0; } } @@ -17997,8 +17996,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub -0.0, N1) -> -N1 if (N0CFP && N0CFP->isZero()) { - if (N0CFP->isNegative() || - (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { + if (N0CFP->isNegative() || Flags.hasNoSignedZeros()) { // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are // flushed to zero, unless all users treat denorms as zero (DAZ). // FIXME: This transform will change the sign of a NaN and the behavior @@ -18014,8 +18012,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } - if ((Options.NoSignedZerosFPMath || - (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && + if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() && N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y if (N0 == N1->getOperand(0)) diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index daebf447..dd83168 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2847,6 +2847,7 @@ unsigned CastInst::isEliminableCastPair( // FPTRUNC > FloatPt n/a FloatPt n/a // FPEXT < FloatPt n/a FloatPt n/a // PTRTOINT n/a Pointer n/a Integral Unsigned + // PTRTOADDR n/a Pointer n/a Integral Unsigned // INTTOPTR n/a Integral Unsigned Pointer n/a // BITCAST = FirstClass n/a FirstClass n/a // ADDRSPCST n/a Pointer n/a Pointer n/a @@ -2878,7 +2879,7 @@ unsigned CastInst::isEliminableCastPair( { 99,99,99, 2, 2,99,99, 8, 2,99,99,99, 4, 0}, // FPExt | { 1, 0, 0,99,99, 0, 0,99,99,99,99, 7, 3, 0}, // PtrToInt | { 1, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // PtrToAddr | - { 99,99,99,99,99,99,99,99,99,11,99,99,15, 0}, // IntToPtr | + { 99,99,99,99,99,99,99,99,99,11,11,99,15, 0}, // IntToPtr | { 5, 5, 5, 0, 0, 5, 5, 0, 0,16,16, 5, 1,14}, // BitCast | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+ }; @@ -2972,7 +2973,8 @@ unsigned CastInst::isEliminableCastPair( // zext, sext -> zext, because sext can't sign extend after zext return Instruction::ZExt; case 11: { - // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize + // inttoptr, ptrtoint/ptrtoaddr -> bitcast if SrcSize<=PtrSize and + // SrcSize==DstSize if (!MidIntPtrTy) return 0; unsigned PtrSize = MidIntPtrTy->getScalarSizeInBits(); diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 5827292..99029c1 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -252,6 +252,13 @@ void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName) { MDB.createString(PassName)})); } +void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F, + StringRef PassName) { + if (std::optional<Function::ProfileCount> EC = F.getEntryCount(); + EC && EC->getCount() > 0) + setExplicitlyUnknownBranchWeights(I, PassName); +} + void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName) { MDBuilder MDB(F.getContext()); F.setMetadata( diff --git a/llvm/lib/Object/OffloadBundle.cpp b/llvm/lib/Object/OffloadBundle.cpp index 1e1042c..0dd378e 100644 --- a/llvm/lib/Object/OffloadBundle.cpp +++ b/llvm/lib/Object/OffloadBundle.cpp @@ -89,17 +89,17 @@ Error OffloadBundleFatBin::readEntries(StringRef Buffer, uint64_t EntryIDSize; StringRef EntryID; - if (auto EC = Reader.readInteger(EntryOffset)) - return errorCodeToError(object_error::parse_failed); + if (Error Err = Reader.readInteger(EntryOffset)) + return Err; - if (auto EC = Reader.readInteger(EntrySize)) - return errorCodeToError(object_error::parse_failed); + if (Error Err = Reader.readInteger(EntrySize)) + return Err; - if (auto EC = Reader.readInteger(EntryIDSize)) - return errorCodeToError(object_error::parse_failed); + if (Error Err = Reader.readInteger(EntryIDSize)) + return Err; - if (auto EC = Reader.readFixedString(EntryID, EntryIDSize)) - return errorCodeToError(object_error::parse_failed); + if (Error Err = Reader.readFixedString(EntryID, EntryIDSize)) + return Err; auto Entry = std::make_unique<OffloadBundleEntry>( EntryOffset + SectionOffset, EntrySize, EntryIDSize, EntryID); @@ -125,7 +125,7 @@ OffloadBundleFatBin::create(MemoryBufferRef Buf, uint64_t SectionOffset, // Read the Bundle Entries Error Err = TheBundle->readEntries(Buf.getBuffer(), SectionOffset); if (Err) - return errorCodeToError(object_error::parse_failed); + return Err; return std::unique_ptr<OffloadBundleFatBin>(TheBundle); } diff --git a/llvm/lib/Support/FileCollector.cpp b/llvm/lib/Support/FileCollector.cpp index 5dc224a..1e5de2c 100644 --- a/llvm/lib/Support/FileCollector.cpp +++ b/llvm/lib/Support/FileCollector.cpp @@ -68,9 +68,8 @@ void FileCollector::PathCanonicalizer::updateWithRealPath( SmallString<256> RealPath; auto DirWithSymlink = CachedDirs.find(Directory); if (DirWithSymlink == CachedDirs.end()) { - // FIXME: Should this be a call to FileSystem::getRealpath(), in some - // cases? What if there is nothing on disk? - if (sys::fs::real_path(Directory, RealPath)) + // FIXME: What if there is nothing on disk? + if (VFS->getRealPath(Directory, RealPath)) return; CachedDirs[Directory] = std::string(RealPath); } else { diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp index 686688a..8da6fdb 100644 --- a/llvm/lib/Support/Mustache.cpp +++ b/llvm/lib/Support/Mustache.cpp @@ -282,18 +282,15 @@ void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) { // For example: // The template string // " \t{{#section}}A{{/section}}" -// would be considered as having no text ahead and would be render as +// would be considered as having no text ahead and would be render as: // "A" -// The exception for this is partial tag which requires us to -// keep track of the indentation once it's rendered. void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx, Token &CurrentToken, Token::Type CurrentType) { Token &PrevToken = Tokens[Idx - 1]; StringRef PrevTokenBody = PrevToken.TokenBody; StringRef Unindented = PrevTokenBody.rtrim(" \r\t\v"); size_t Indentation = PrevTokenBody.size() - Unindented.size(); - if (CurrentType != Token::Type::Partial) - PrevToken.TokenBody = Unindented.str(); + PrevToken.TokenBody = Unindented.str(); CurrentToken.setIndentation(Indentation); } @@ -439,7 +436,8 @@ class AddIndentationStringStream : public raw_ostream { public: explicit AddIndentationStringStream(llvm::raw_ostream &WrappedStream, size_t Indentation) - : Indentation(Indentation), WrappedStream(WrappedStream) { + : Indentation(Indentation), WrappedStream(WrappedStream), + NeedsIndent(true) { SetUnbuffered(); } @@ -448,10 +446,15 @@ protected: llvm::StringRef Data(Ptr, Size); SmallString<0> Indent; Indent.resize(Indentation, ' '); + for (char C : Data) { + if (NeedsIndent && C != '\n') { + WrappedStream << Indent; + NeedsIndent = false; + } WrappedStream << C; if (C == '\n') - WrappedStream << Indent; + NeedsIndent = true; } } @@ -460,6 +463,7 @@ protected: private: size_t Indentation; llvm::raw_ostream &WrappedStream; + bool NeedsIndent; }; class Parser { diff --git a/llvm/lib/Support/SipHash.cpp b/llvm/lib/Support/SipHash.cpp index 86dad66..382d36f 100644 --- a/llvm/lib/Support/SipHash.cpp +++ b/llvm/lib/Support/SipHash.cpp @@ -35,14 +35,19 @@ void llvm::getSipHash_2_4_128(ArrayRef<uint8_t> In, const uint8_t (&K)[16], siphash<2, 4>(In.data(), In.size(), K, Out); } -/// Compute an ABI-stable 16-bit hash of the given string. -uint16_t llvm::getPointerAuthStableSipHash(StringRef Str) { +/// Compute an ABI-stable 64-bit hash of the given string. +uint64_t llvm::getStableSipHash(StringRef Str) { static const uint8_t K[16] = {0xb5, 0xd4, 0xc9, 0xeb, 0x79, 0x10, 0x4a, 0x79, 0x6f, 0xec, 0x8b, 0x1b, 0x42, 0x87, 0x81, 0xd4}; uint8_t RawHashBytes[8]; getSipHash_2_4_64(arrayRefFromStringRef(Str), K, RawHashBytes); - uint64_t RawHash = endian::read64le(RawHashBytes); + return endian::read64le(RawHashBytes); +} + +/// Compute an ABI-stable 16-bit hash of the given string. +uint16_t llvm::getPointerAuthStableSipHash(StringRef Str) { + uint64_t RawHash = getStableSipHash(Str); // Produce a non-zero 16-bit discriminator. uint16_t Discriminator = (RawHash % 0xFFFF) + 1; diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index cf78459..7ff62d4 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -1973,7 +1973,7 @@ private: EC = FS->makeAbsolute(FullPath, Name); Name = canonicalize(Name); } else { - EC = sys::fs::make_absolute(Name); + EC = FS->makeAbsolute(Name); } if (EC) { assert(NameValueNode && "Name presence should be checked earlier"); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp index 38718c4..7504f1a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp @@ -150,7 +150,10 @@ public: if (!CVisited.insert(CII).second) continue; - if (CII->getParent() == II->getParent() && !IsLookThru(II)) + // Same-BB filter must look at the *user*; and allow non-lookthrough + // users when the def is a PHI (loop-header pattern). + if (CII->getParent() == II->getParent() && !IsLookThru(CII) && + !isa<PHINode>(II)) continue; if (isOpLegal(CII)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index d9bfeae..0a59132 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -2562,7 +2562,9 @@ bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) { for (Function *F : NeedsPostProcess) Splitter.processFunction(*F); for (Function *F : Intrinsics) { - if (isRemovablePointerIntrinsic(F->getIntrinsicID())) { + // use_empty() can also occur with cases like masked load, which will + // have been rewritten out of the module by now but not erased. + if (F->use_empty() || isRemovablePointerIntrinsic(F->getIntrinsicID())) { F->eraseFromParent(); } else { std::optional<Function *> NewF = Intrinsic::remangleIntrinsicFunction(F); diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index cb57c43..d4d9e54 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -193,7 +193,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, // we need to invert the branch condition to jump over TrueBB when the // condition is false. auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); - CC = RISCVCC::getOppositeBranchCondition(CC); + CC = RISCVCC::getInverseBranchCondition(CC); // Insert branch instruction. BuildMI(MBB, MBBI, DL, TII->get(RISCVCC::getBrCond(CC))) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 56db09a..6d418fd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1134,7 +1134,7 @@ unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, unsigned SelectOpc) { } } -RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) { +RISCVCC::CondCode RISCVCC::getInverseBranchCondition(RISCVCC::CondCode CC) { switch (CC) { default: llvm_unreachable("Unrecognized conditional branch"); @@ -1554,7 +1554,7 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { return Register(); }; - unsigned NewOpc = RISCVCC::getBrCond(getOppositeBranchCondition(CC)); + unsigned NewOpc = RISCVCC::getBrCond(getInverseBranchCondition(CC)); // Might be case 1. // Don't change 0 to 1 since we can use x0. @@ -1801,7 +1801,7 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI, // Add condition code, inverting if necessary. auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); if (Invert) - CC = RISCVCC::getOppositeBranchCondition(CC); + CC = RISCVCC::getInverseBranchCondition(CC); NewMI.addImm(CC); // Copy the false register. @@ -3978,7 +3978,7 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, case RISCV::PseudoCCMOVGPR: { // CCMOV can be commuted by inverting the condition. auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); - CC = RISCVCC::getOppositeBranchCondition(CC); + CC = RISCVCC::getInverseBranchCondition(CC); auto &WorkingMI = cloneIfNew(MI); WorkingMI.getOperand(3).setImm(CC); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 2bc499b..42a0c4c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -44,7 +44,7 @@ enum CondCode { COND_INVALID }; -CondCode getOppositeBranchCondition(CondCode); +CondCode getInverseBranchCondition(CondCode); unsigned getBrCond(CondCode CC, unsigned SelectOpc = 0); } // end of namespace RISCVCC diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 563f3bb..d4124ae 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -167,6 +167,42 @@ static bool canUseShiftPair(Instruction *Inst, const APInt &Imm) { return false; } +// If this is i64 AND is part of (X & -(1 << C1) & 0xffffffff) == C2 << C1), +// DAGCombiner can convert this to (sraiw X, C1) == sext(C2) for RV64. On RV32, +// the type will be split so only the lower 32 bits need to be compared using +// (srai/srli X, C) == C2. +static bool canUseShiftCmp(Instruction *Inst, const APInt &Imm) { + if (!Inst->hasOneUse()) + return false; + + // Look for equality comparison. + auto *Cmp = dyn_cast<ICmpInst>(*Inst->user_begin()); + if (!Cmp || !Cmp->isEquality()) + return false; + + // Right hand side of comparison should be a constant. + auto *C = dyn_cast<ConstantInt>(Cmp->getOperand(1)); + if (!C) + return false; + + uint64_t Mask = Imm.getZExtValue(); + + // Mask should be of the form -(1 << C) in the lower 32 bits. + if (!isUInt<32>(Mask) || !isPowerOf2_32(-uint32_t(Mask))) + return false; + + // Comparison constant should be a subset of Mask. + uint64_t CmpC = C->getZExtValue(); + if ((CmpC & Mask) != CmpC) + return false; + + // We'll need to sign extend the comparison constant and shift it right. Make + // sure the new constant can use addi/xori+seqz/snez. + unsigned ShiftBits = llvm::countr_zero(Mask); + int64_t NewCmpC = SignExtend64<32>(CmpC) >> ShiftBits; + return NewCmpC >= -2048 && NewCmpC <= 2048; +} + InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, @@ -224,6 +260,9 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() && canUseShiftPair(Inst, Imm)) return TTI::TCC_Free; + if (Inst && Idx == 1 && Imm.getBitWidth() == 64 && + canUseShiftCmp(Inst, Imm)) + return TTI::TCC_Free; Takes12BitImm = true; break; case Instruction::Add: diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index b4fc8da..db85e33 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -587,7 +587,8 @@ bool SPIRVLegalizerInfo::legalizeIsFPClass( } if (FPClassTest PartialCheck = Mask & fcNan) { - auto InfWithQnanBitC = buildSPIRVConstant(IntTy, Inf | QNaNBitMask); + auto InfWithQnanBitC = + buildSPIRVConstant(IntTy, std::move(Inf) | QNaNBitMask); if (PartialCheck == fcNan) { // isnan(V) ==> abs(V) u> int(inf) appendToRes( @@ -613,7 +614,7 @@ bool SPIRVLegalizerInfo::legalizeIsFPClass( APInt ExpLSB = ExpMask & ~(ExpMask.shl(1)); auto ExpMinusOne = assignSPIRVTy( MIRBuilder.buildSub(IntTy, Abs, buildSPIRVConstant(IntTy, ExpLSB))); - APInt MaxExpMinusOne = ExpMask - ExpLSB; + APInt MaxExpMinusOne = std::move(ExpMask) - ExpLSB; auto NormalRes = assignSPIRVTy( MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne, buildSPIRVConstant(IntTy, MaxExpMinusOne))); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e7eb67a..cd04ff5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31215,16 +31215,16 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, unsigned NumElts = VT.getVectorNumElements(); if (Subtarget.hasVBMI2() && EltSizeInBits > 8) { - if (IsFSHR) - std::swap(Op0, Op1); if (IsCstSplat) { + if (IsFSHR) + std::swap(Op0, Op1); uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits); SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8); return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, {Op0, Op1, Imm}, DAG, Subtarget); } - return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, + return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT, {Op0, Op1, Amt}, DAG, Subtarget); } assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 || @@ -35139,8 +35139,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VALIGN) NODE_NAME_CASE(VSHLD) NODE_NAME_CASE(VSHRD) - NODE_NAME_CASE(VSHLDV) - NODE_NAME_CASE(VSHRDV) NODE_NAME_CASE(PSHUFD) NODE_NAME_CASE(PSHUFHW) NODE_NAME_CASE(PSHUFLW) @@ -45171,6 +45169,7 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( case X86ISD::Wrapper: case X86ISD::WrapperRIP: return true; + case X86ISD::INSERTPS: case X86ISD::BLENDI: case X86ISD::PSHUFB: case X86ISD::PSHUFD: @@ -45241,6 +45240,7 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode( case X86ISD::BLENDV: return false; // SSE target shuffles. + case X86ISD::INSERTPS: case X86ISD::PSHUFB: case X86ISD::PSHUFD: case X86ISD::UNPCKL: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 8ab8c66..b55556a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -471,8 +471,7 @@ namespace llvm { // VBMI2 Concat & Shift. VSHLD, VSHRD, - VSHLDV, - VSHRDV, + // Shuffle Packed Values at 128-bit granularity. SHUF128, MOVDDUP, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2371ed4..564810c 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12300,72 +12300,76 @@ defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; // VBMI2 //===----------------------------------------------------------------------===// -multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, +multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR, X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { let Constraints = "$src1 = $dst", ExeDomain = VTI.ExeDomain in { defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, "$src3, $src2", "$src2, $src3", - (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, + !if(SwapLR, + (VTI.VT (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src3))), + (VTI.VT (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src3))))>, T8, PD, EVEX, VVVV, Sched<[sched]>; defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, "$src3, $src2", "$src2, $src3", - (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, - (VTI.VT (VTI.LdFrag addr:$src3))))>, + !if(SwapLR, + (VTI.VT (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.LdFrag addr:$src3)))), + (VTI.VT (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT (VTI.LdFrag addr:$src3)))))>, T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; } } -multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, +multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR, X86FoldableSchedWrite sched, X86VectorVTInfo VTI> - : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { + : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched, VTI> { let Constraints = "$src1 = $dst", ExeDomain = VTI.ExeDomain in defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, "${src3}"#VTI.BroadcastStr#", $src2", "$src2, ${src3}"#VTI.BroadcastStr, - (OpNode VTI.RC:$src1, VTI.RC:$src2, - (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, + !if(SwapLR, + (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.BroadcastLdFrag addr:$src3))), + (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT (VTI.BroadcastLdFrag addr:$src3))))>, T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } -multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, +multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { let Predicates = [HasVBMI2] in - defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, + defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.ZMM, VTI.info512>, EVEX_V512; let Predicates = [HasVBMI2, HasVLX] in { - defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, + defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.YMM, VTI.info256>, EVEX_V256; - defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, + defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.XMM, VTI.info128>, EVEX_V128; } } -multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, +multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { let Predicates = [HasVBMI2] in - defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, + defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.ZMM, VTI.info512>, EVEX_V512; let Predicates = [HasVBMI2, HasVLX] in { - defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, + defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.YMM, VTI.info256>, EVEX_V256; - defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, + defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.XMM, VTI.info128>, EVEX_V128; } } multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, - SDNode OpNode, X86SchedWriteWidths sched> { - defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, + SDNode OpNode, bit SwapLR, X86SchedWriteWidths sched> { + defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, SwapLR, sched, avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>; - defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, + defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, SwapLR, sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; - defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, + defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, SwapLR, sched, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; } @@ -12381,8 +12385,8 @@ multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, } // Concat & Shift -defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; -defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; +defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", fshl, 0, SchedWriteVecIMul>; +defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", fshr, 1, SchedWriteVecIMul>; defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index b4768590..031fdc1 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -25,18 +25,12 @@ let SchedRW = [WriteLEA] in { [(set GR32:$dst, lea32addr:$src)]>, OpSize32, Requires<[Not64BitMode]>; - let Predicates = [HasNDD], isCodeGenOnly = 1 in { - def LEA64_8r : I<0x8D, MRMSrcMem, (outs GR8:$dst), (ins lea64_8mem:$src), - "lea{b}\t{$src|$dst}, {$dst|$src}", - [(set GR8:$dst, lea64_iaddr:$src)]>, - OpSize16, - Requires<[In64BitMode]>; - - def LEA64_16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins lea64_16mem:$src), - "lea{w}\t{$src|$dst}, {$dst|$src}", - [(set GR16:$dst, lea64_iaddr:$src)]>, - OpSize16, - Requires<[In64BitMode]>; + let isCodeGenOnly = 1 in { + def LEA64_8r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_8mem:$src), + "lea{l}\t{$src|$dst}, {$dst|$src}", []>, OpSize32; + + def LEA64_16r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_16mem:$src), + "lea{l}\t{$src|$dst}, {$dst|$src}", []>, OpSize32; } def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src), @@ -51,6 +45,11 @@ let SchedRW = [WriteLEA] in { [(set GR64:$dst, lea64addr:$src)]>; } // SchedRW +let Predicates = [HasNDD] in { + def : Pat<(i8 lea64_iaddr:$src), (EXTRACT_SUBREG (LEA64_8r lea64_8mem:$src), sub_8bit)>; + def : Pat<(i16 lea64_iaddr:$src), (EXTRACT_SUBREG (LEA64_16r lea64_16mem:$src), sub_16bit)>; +} + // Pseudo instruction for lea that prevent optimizer from eliminating // the instruction. let SchedRW = [WriteLEA], isPseudo = true, hasSideEffects = 1 in { diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 0c20ffe..5321ecf 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -406,16 +406,6 @@ def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>; def X86VShld : SDNode<"X86ISD::VSHLD", SDTShuff3OpI>; def X86VShrd : SDNode<"X86ISD::VSHRD", SDTShuff3OpI>; -def X86VShldv : SDNode<"X86ISD::VSHLDV", - SDTypeProfile<1, 3, [SDTCisVec<0>, - SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, - SDTCisSameAs<0,3>]>>; -def X86VShrdv : SDNode<"X86ISD::VSHRDV", - SDTypeProfile<1, 3, [SDTCisVec<0>, - SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, - SDTCisSameAs<0,3>]>>; def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index d1ca0a6..59e103cd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -880,11 +880,11 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { // zext(bool) + C -> bool ? C + 1 : C if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) - return SelectInst::Create(X, InstCombiner::AddOne(Op1C), Op1); + return createSelectInst(X, InstCombiner::AddOne(Op1C), Op1); // sext(bool) + C -> bool ? C - 1 : C if (match(Op0, m_SExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) - return SelectInst::Create(X, InstCombiner::SubOne(Op1C), Op1); + return createSelectInst(X, InstCombiner::SubOne(Op1C), Op1); // ~X + C --> (C-1) - X if (match(Op0, m_Not(m_Value(X)))) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 7a979c1..4f94aa2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -23,6 +23,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Value.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" @@ -62,14 +63,14 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final public InstVisitor<InstCombinerImpl, Instruction *> { public: InstCombinerImpl(InstructionWorklist &Worklist, BuilderTy &Builder, - bool MinimizeSize, AAResults *AA, AssumptionCache &AC, + Function &F, AAResults *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, const DataLayout &DL, ReversePostOrderTraversal<BasicBlock *> &RPOT) - : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE, - BFI, BPI, PSI, DL, RPOT) {} + : InstCombiner(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI, BPI, + PSI, DL, RPOT) {} virtual ~InstCombinerImpl() = default; @@ -469,6 +470,17 @@ private: Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable, unsigned Depth = 0); + SelectInst *createSelectInst(Value *C, Value *S1, Value *S2, + const Twine &NameStr = "", + InsertPosition InsertBefore = nullptr, + Instruction *MDFrom = nullptr) { + SelectInst *SI = + SelectInst::Create(C, S1, S2, NameStr, InsertBefore, MDFrom); + if (!MDFrom) + setExplicitlyUnknownBranchWeightsIfProfiled(*SI, F, DEBUG_TYPE); + return SI; + } + public: /// Create and insert the idiom we use to indicate a block is unreachable /// without having to rewrite the CFG from within InstCombine. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 550f095..d457e0c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1253,7 +1253,7 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { // shl (zext i1 X), C1 --> select (X, 1 << C1, 0) if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { auto *NewC = Builder.CreateShl(ConstantInt::get(Ty, 1), C1); - return SelectInst::Create(X, NewC, ConstantInt::getNullValue(Ty)); + return createSelectInst(X, NewC, ConstantInt::getNullValue(Ty)); } } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index f0ddd5c..8fbaf68 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1735,7 +1735,7 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { Constant *Zero = ConstantInt::getNullValue(BO.getType()); Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C); Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C); - return SelectInst::Create(X, TVal, FVal); + return createSelectInst(X, TVal, FVal); } static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI, @@ -5934,8 +5934,8 @@ static bool combineInstructionsOverFunction( LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); - InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, - ORE, BFI, BPI, PSI, DL, RPOT); + InstCombinerImpl IC(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI, + BPI, PSI, DL, RPOT); IC.MaxArraySizeForCombine = MaxArraySize; bool MadeChangeInThisIteration = IC.prepareWorklist(F); MadeChangeInThisIteration |= IC.run(); diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index e5bf2d1..d842275 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -35,6 +35,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation/CFGMST.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" @@ -92,8 +93,10 @@ class GCOVFunction; class GCOVProfiler { public: - GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {} - GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {} + GCOVProfiler() + : GCOVProfiler(GCOVOptions::getDefault(), *vfs::getRealFileSystem()) {} + GCOVProfiler(const GCOVOptions &Opts, vfs::FileSystem &VFS) + : Options(Opts), VFS(VFS) {} bool runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI, function_ref<BranchProbabilityInfo *(Function &F)> GetBPI, @@ -110,6 +113,7 @@ public: os->write_zeros(4 - s.size() % 4); } void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); } + vfs::FileSystem &getVirtualFileSystem() const { return VFS; } private: // Create the .gcno files for the Module based on DebugInfo. @@ -166,6 +170,7 @@ private: std::vector<Regex> ExcludeRe; DenseSet<const BasicBlock *> ExecBlocks; StringMap<bool> InstrumentedFiles; + vfs::FileSystem &VFS; }; struct BBInfo { @@ -214,10 +219,10 @@ static StringRef getFunctionName(const DISubprogram *SP) { /// Prefer relative paths in the coverage notes. Clang also may split /// up absolute paths into a directory and filename component. When /// the relative path doesn't exist, reconstruct the absolute path. -static SmallString<128> getFilename(const DIScope *SP) { +static SmallString<128> getFilename(const DIScope *SP, vfs::FileSystem &VFS) { SmallString<128> Path; StringRef RelPath = SP->getFilename(); - if (sys::fs::exists(RelPath)) + if (VFS.exists(RelPath)) Path = RelPath; else sys::path::append(Path, SP->getDirectory(), SP->getFilename()); @@ -357,7 +362,7 @@ namespace { void writeOut(uint32_t CfgChecksum) { write(GCOV_TAG_FUNCTION); - SmallString<128> Filename = getFilename(SP); + SmallString<128> Filename = getFilename(SP, P->getVirtualFileSystem()); uint32_t BlockLen = 3 + wordsOfString(getFunctionName(SP)); BlockLen += 1 + wordsOfString(Filename) + 4; @@ -455,7 +460,7 @@ bool GCOVProfiler::isFunctionInstrumented(const Function &F) { if (FilterRe.empty() && ExcludeRe.empty()) { return true; } - SmallString<128> Filename = getFilename(F.getSubprogram()); + SmallString<128> Filename = getFilename(F.getSubprogram(), VFS); auto It = InstrumentedFiles.find(Filename); if (It != InstrumentedFiles.end()) { return It->second; @@ -467,7 +472,7 @@ bool GCOVProfiler::isFunctionInstrumented(const Function &F) { // Path can be // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for // such a case we must get the real_path. - if (sys::fs::real_path(Filename, RealPath)) { + if (VFS.getRealPath(Filename, RealPath)) { // real_path can fail with path like "foo.c". RealFilename = Filename; } else { @@ -524,9 +529,10 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU, SmallString<128> Filename = CU->getFilename(); sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda"); StringRef FName = sys::path::filename(Filename); - SmallString<128> CurPath; - if (sys::fs::current_path(CurPath)) + ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory(); + if (!CWD) return std::string(FName); + SmallString<128> CurPath{*CWD}; sys::path::append(CurPath, FName); return std::string(CurPath); } @@ -554,7 +560,7 @@ bool GCOVProfiler::runOnModule( PreservedAnalyses GCOVProfilerPass::run(Module &M, ModuleAnalysisManager &AM) { - GCOVProfiler Profiler(GCOVOpts); + GCOVProfiler Profiler(GCOVOpts, *VFS); FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); @@ -789,7 +795,7 @@ bool GCOVProfiler::emitProfileNotes( // Add the function line number to the lines of the entry block // to have a counter for the function definition. uint32_t Line = SP->getLine(); - auto Filename = getFilename(SP); + auto Filename = getFilename(SP, VFS); BranchProbabilityInfo *BPI = GetBPI(F); BlockFrequencyInfo *BFI = GetBFI(F); @@ -881,7 +887,7 @@ bool GCOVProfiler::emitProfileNotes( if (SP != getDISubprogram(Scope)) continue; - GCOVLines &Lines = Block.getFile(getFilename(Loc->getScope())); + GCOVLines &Lines = Block.getFile(getFilename(Loc->getScope(), VFS)); Lines.addLine(Loc.getLine()); } Line = 0; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 2d84b4a..216bdf4 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -84,7 +84,6 @@ #include <cstdint> #include <iterator> #include <map> -#include <numeric> #include <optional> #include <set> #include <tuple> @@ -6356,25 +6355,25 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, if (DefaultResult) { Value *ValueCompare = Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp"); - SelectInst *SelectValueInst = cast<SelectInst>(Builder.CreateSelect( - ValueCompare, ResultVector[1].first, DefaultResult, "switch.select")); - SelectValue = SelectValueInst; - if (HasBranchWeights) { + SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first, + DefaultResult, "switch.select"); + if (auto *SI = dyn_cast<SelectInst>(SelectValue); + SI && HasBranchWeights) { // We start with 3 probabilities, where the numerator is the // corresponding BranchWeights[i], and the denominator is the sum over // BranchWeights. We want the probability and negative probability of // Condition == SecondCase. assert(BranchWeights.size() == 3); - setBranchWeights(SelectValueInst, BranchWeights[2], + setBranchWeights(SI, BranchWeights[2], BranchWeights[0] + BranchWeights[1], /*IsExpected=*/false); } } Value *ValueCompare = Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp"); - SelectInst *Ret = cast<SelectInst>(Builder.CreateSelect( - ValueCompare, ResultVector[0].first, SelectValue, "switch.select")); - if (HasBranchWeights) { + Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first, + SelectValue, "switch.select"); + if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) { // We may have had a DefaultResult. Base the position of the first and // second's branch weights accordingly. Also the proability that Condition // != FirstCase needs to take that into account. @@ -6382,7 +6381,7 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, size_t FirstCasePos = (Condition != nullptr); size_t SecondCasePos = FirstCasePos + 1; uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0; - setBranchWeights(Ret, BranchWeights[FirstCasePos], + setBranchWeights(SI, BranchWeights[FirstCasePos], DefaultCase + BranchWeights[SecondCasePos], /*IsExpected=*/false); } @@ -6422,13 +6421,13 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Value *And = Builder.CreateAnd(Condition, AndMask); Value *Cmp = Builder.CreateICmpEQ( And, Constant::getIntegerValue(And->getType(), AndMask)); - SelectInst *Ret = cast<SelectInst>( - Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult)); - if (HasBranchWeights) { + Value *Ret = + Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); + if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) { // We know there's a Default case. We base the resulting branch // weights off its probability. assert(BranchWeights.size() >= 2); - setBranchWeights(Ret, accumulate(drop_begin(BranchWeights), 0), + setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0), BranchWeights[0], /*IsExpected=*/false); } return Ret; @@ -6448,11 +6447,11 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and"); Value *Cmp = Builder.CreateICmpEQ( And, Constant::getNullValue(And->getType()), "switch.selectcmp"); - SelectInst *Ret = cast<SelectInst>( - Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult)); - if (HasBranchWeights) { + Value *Ret = + Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); + if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) { assert(BranchWeights.size() >= 2); - setBranchWeights(Ret, accumulate(drop_begin(BranchWeights), 0), + setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0), BranchWeights[0], /*IsExpected=*/false); } return Ret; @@ -6466,11 +6465,11 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1], "switch.selectcmp.case2"); Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp"); - SelectInst *Ret = cast<SelectInst>( - Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult)); - if (HasBranchWeights) { + Value *Ret = + Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); + if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) { assert(BranchWeights.size() >= 2); - setBranchWeights(Ret, accumulate(drop_begin(BranchWeights), 0), + setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0), BranchWeights[0], /*IsExpected=*/false); } return Ret; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 065622e..c547662 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1100,7 +1100,9 @@ class BinOpSameOpcodeHelper { // constant + x cannot be -constant - x // instead, it should be x - -constant if (Pos == 1 || - (FromOpcode == Instruction::Add && ToOpcode == Instruction::Sub)) + ((FromOpcode == Instruction::Add || FromOpcode == Instruction::Or || + FromOpcode == Instruction::Xor) && + ToOpcode == Instruction::Sub)) return SmallVector<Value *>({LHS, RHS}); return SmallVector<Value *>({RHS, LHS}); } @@ -1188,6 +1190,10 @@ public: if (CIValue.isAllOnes()) InterchangeableMask = CanBeAll; break; + case Instruction::Xor: + if (CIValue.isZero()) + InterchangeableMask = XorBIT | OrBIT | AndBIT | SubBIT | AddBIT; + break; default: if (CIValue.isZero()) InterchangeableMask = CanBeAll; diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 0ef933f..32704bd 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2487,21 +2487,31 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask)))) return false; + // Check whether this is a binary shuffle. + bool IsBinaryShuffle = !isa<UndefValue>(V1); + auto *C0 = dyn_cast<CastInst>(V0); auto *C1 = dyn_cast<CastInst>(V1); - if (!C0 || !C1) + if (!C0 || (IsBinaryShuffle && !C1)) return false; Instruction::CastOps Opcode = C0->getOpcode(); - if (C0->getSrcTy() != C1->getSrcTy()) + + // If this is allowed, foldShuffleOfCastops can get stuck in a loop + // with foldBitcastOfShuffle. Reject in favor of foldBitcastOfShuffle. + if (!IsBinaryShuffle && Opcode == Instruction::BitCast) return false; - // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds. - if (Opcode != C1->getOpcode()) { - if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value()))) - Opcode = Instruction::SExt; - else + if (IsBinaryShuffle) { + if (C0->getSrcTy() != C1->getSrcTy()) return false; + // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds. + if (Opcode != C1->getOpcode()) { + if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value()))) + Opcode = Instruction::SExt; + else + return false; + } } auto *ShuffleDstTy = dyn_cast<FixedVectorType>(I.getType()); @@ -2544,23 +2554,31 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { InstructionCost CostC0 = TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy, TTI::CastContextHint::None, CostKind); - InstructionCost CostC1 = - TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy, - TTI::CastContextHint::None, CostKind); - InstructionCost OldCost = CostC0 + CostC1; - OldCost += - TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy, - CastDstTy, OldMask, CostKind, 0, nullptr, {}, &I); - InstructionCost NewCost = - TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, NewShuffleDstTy, - CastSrcTy, NewMask, CostKind); + TargetTransformInfo::ShuffleKind ShuffleKind; + if (IsBinaryShuffle) + ShuffleKind = TargetTransformInfo::SK_PermuteTwoSrc; + else + ShuffleKind = TargetTransformInfo::SK_PermuteSingleSrc; + + InstructionCost OldCost = CostC0; + OldCost += TTI.getShuffleCost(ShuffleKind, ShuffleDstTy, CastDstTy, OldMask, + CostKind, 0, nullptr, {}, &I); + + InstructionCost NewCost = TTI.getShuffleCost(ShuffleKind, NewShuffleDstTy, + CastSrcTy, NewMask, CostKind); NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy, TTI::CastContextHint::None, CostKind); if (!C0->hasOneUse()) NewCost += CostC0; - if (!C1->hasOneUse()) - NewCost += CostC1; + if (IsBinaryShuffle) { + InstructionCost CostC1 = + TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy, + TTI::CastContextHint::None, CostKind); + OldCost += CostC1; + if (!C1->hasOneUse()) + NewCost += CostC1; + } LLVM_DEBUG(dbgs() << "Found a shuffle feeding two casts: " << I << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost @@ -2568,14 +2586,20 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { if (NewCost > OldCost) return false; - Value *Shuf = Builder.CreateShuffleVector(C0->getOperand(0), - C1->getOperand(0), NewMask); + Value *Shuf; + if (IsBinaryShuffle) + Shuf = Builder.CreateShuffleVector(C0->getOperand(0), C1->getOperand(0), + NewMask); + else + Shuf = Builder.CreateShuffleVector(C0->getOperand(0), NewMask); + Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy); // Intersect flags from the old casts. if (auto *NewInst = dyn_cast<Instruction>(Cast)) { NewInst->copyIRFlags(C0); - NewInst->andIRFlags(C1); + if (IsBinaryShuffle) + NewInst->andIRFlags(C1); } Worklist.pushValue(Shuf); @@ -4433,7 +4457,7 @@ bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) { // Create new mask using difference of the two incoming masks. int MaskOffset = NewMask[0u]; - unsigned Index = (InputNumElements - MaskOffset) % InputNumElements; + unsigned Index = (InputNumElements + MaskOffset) % InputNumElements; NewMask.clear(); for (unsigned I = 0u; I < InputNumElements; ++I) { |