aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp140
-rw-r--r--llvm/lib/CAS/CMakeLists.txt5
-rw-r--r--llvm/lib/CAS/DatabaseFile.cpp123
-rw-r--r--llvm/lib/CAS/DatabaseFile.h153
-rw-r--r--llvm/lib/CAS/InMemoryCAS.cpp3
-rw-r--r--llvm/lib/CAS/OnDiskTrieRawHashMap.cpp1178
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp107
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h35
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp32
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfFile.h9
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp13
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h16
-rw-r--r--llvm/lib/CodeGen/LexicalScopes.cpp33
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp2
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp2
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp20
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp9
-rw-r--r--llvm/lib/IR/Instructions.cpp6
-rw-r--r--llvm/lib/IR/ProfDataUtils.cpp7
-rw-r--r--llvm/lib/Object/OffloadBundle.cpp18
-rw-r--r--llvm/lib/Support/FileCollector.cpp5
-rw-r--r--llvm/lib/Support/Mustache.cpp18
-rw-r--r--llvm/lib/Support/SipHash.cpp11
-rw-r--r--llvm/lib/Support/VirtualFileSystem.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp39
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp5
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td48
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td23
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td10
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h18
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp6
-rw-r--r--llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp30
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp43
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp70
47 files changed, 2055 insertions, 242 deletions
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 90bae77..6fb2807 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -59,6 +59,11 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(LazyValueInfoWrapperPass, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
+static cl::opt<bool> PerPredRanges(
+ "lvi-per-pred-ranges", cl::Hidden, cl::init(false),
+ cl::desc("Enable tracking of ranges for a value in a block for"
+ "each block predecessor (default = false)"));
+
namespace llvm {
FunctionPass *createLazyValueInfoPass() {
return new LazyValueInfoWrapperPass();
@@ -103,6 +108,10 @@ namespace {
namespace {
using NonNullPointerSet = SmallDenseSet<AssertingVH<Value>, 2>;
+using BBLatticeElementMap =
+ SmallDenseMap<PoisoningVH<BasicBlock>, ValueLatticeElement, 4>;
+using PredecessorValueLatticeMap =
+ SmallDenseMap<AssertingVH<Value>, BBLatticeElementMap, 2>;
/// This is the cache kept by LazyValueInfo which
/// maintains information about queries across the clients' queries.
@@ -117,6 +126,10 @@ class LazyValueInfoCache {
// std::nullopt indicates that the nonnull pointers for this basic block
// block have not been computed yet.
std::optional<NonNullPointerSet> NonNullPointers;
+ // This is an extension of the above LatticeElements, caching, for each
+ // Value, a ValueLatticeElement, for each predecessor of the BB tracked by
+ // this entry.
+ std::optional<PredecessorValueLatticeMap> PredecessorLatticeElements;
};
/// Cached information per basic block.
@@ -134,8 +147,14 @@ class LazyValueInfoCache {
BlockCacheEntry *getOrCreateBlockEntry(BasicBlock *BB) {
auto It = BlockCache.find_as(BB);
- if (It == BlockCache.end())
- It = BlockCache.insert({BB, std::make_unique<BlockCacheEntry>()}).first;
+ if (It == BlockCache.end()) {
+ std::unique_ptr<BlockCacheEntry> BCE =
+ std::make_unique<BlockCacheEntry>();
+ if (PerPredRanges)
+ BCE->PredecessorLatticeElements =
+ std::make_optional<PredecessorValueLatticeMap>();
+ It = BlockCache.insert({BB, std::move(BCE)}).first;
+ }
return It->second.get();
}
@@ -161,6 +180,28 @@ public:
addValueHandle(Val);
}
+ void insertPredecessorResults(Value *Val, BasicBlock *BB,
+ BBLatticeElementMap &PredLatticeElements) {
+ BlockCacheEntry *Entry = getOrCreateBlockEntry(BB);
+
+ Entry->PredecessorLatticeElements->insert({Val, PredLatticeElements});
+
+ addValueHandle(Val);
+ }
+
+ std::optional<BBLatticeElementMap>
+ getCachedPredecessorInfo(Value *V, BasicBlock *BB) const {
+ const BlockCacheEntry *Entry = getBlockEntry(BB);
+ if (!Entry)
+ return std::nullopt;
+
+ auto LatticeIt = Entry->PredecessorLatticeElements->find_as(V);
+ if (LatticeIt == Entry->PredecessorLatticeElements->end())
+ return std::nullopt;
+
+ return LatticeIt->second;
+ }
+
std::optional<ValueLatticeElement> getCachedValueInfo(Value *V,
BasicBlock *BB) const {
const BlockCacheEntry *Entry = getBlockEntry(BB);
@@ -216,6 +257,8 @@ void LazyValueInfoCache::eraseValue(Value *V) {
Pair.second->OverDefined.erase(V);
if (Pair.second->NonNullPointers)
Pair.second->NonNullPointers->erase(V);
+ if (PerPredRanges)
+ Pair.second->PredecessorLatticeElements->erase(V);
}
auto HandleIt = ValueHandles.find_as(V);
@@ -230,6 +273,10 @@ void LVIValueHandle::deleted() {
}
void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+ // Clear all when a BB is removed.
+ if (PerPredRanges)
+ for (auto &Pair : BlockCache)
+ Pair.second->PredecessorLatticeElements->clear();
BlockCache.erase(BB);
}
@@ -691,6 +738,9 @@ LazyValueInfoImpl::solveBlockValueNonLocal(Value *Val, BasicBlock *BB) {
// find a path to function entry. TODO: We should consider explicitly
// canonicalizing to make this true rather than relying on this happy
// accident.
+ std::optional<BBLatticeElementMap> PredLatticeElements;
+ if (PerPredRanges)
+ PredLatticeElements = std::make_optional<BBLatticeElementMap>();
for (BasicBlock *Pred : predecessors(BB)) {
// Skip self loops.
if (Pred == BB)
@@ -710,8 +760,13 @@ LazyValueInfoImpl::solveBlockValueNonLocal(Value *Val, BasicBlock *BB) {
<< Pred->getName() << "' (non local).\n");
return Result;
}
+ if (PerPredRanges)
+ PredLatticeElements->insert({Pred, *EdgeResult});
}
+ if (PerPredRanges)
+ TheCache.insertPredecessorResults(Val, BB, *PredLatticeElements);
+
// Return the merged value, which is more precise than 'overdefined'.
assert(!Result.isOverdefined());
return Result;
@@ -724,6 +779,9 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) {
// Loop over all of our predecessors, merging what we know from them into
// result. See the comment about the chosen traversal order in
// solveBlockValueNonLocal; the same reasoning applies here.
+ std::optional<BBLatticeElementMap> PredLatticeElements;
+ if (PerPredRanges)
+ PredLatticeElements = std::make_optional<BBLatticeElementMap>();
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *PhiBB = PN->getIncomingBlock(i);
Value *PhiVal = PN->getIncomingValue(i);
@@ -746,8 +804,14 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) {
return Result;
}
+
+ if (PerPredRanges)
+ PredLatticeElements->insert({PhiBB, *EdgeResult});
}
+ if (PerPredRanges)
+ TheCache.insertPredecessorResults(PN, BB, *PredLatticeElements);
+
// Return the merged value, which is more precise than 'overdefined'.
assert(!Result.isOverdefined() && "Possible PHI in entry block?");
return Result;
@@ -1002,7 +1066,77 @@ LazyValueInfoImpl::solveBlockValueBinaryOpImpl(
const ConstantRange &LHSRange = *LHSRes;
const ConstantRange &RHSRange = *RHSRes;
- return ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange));
+
+ std::optional<ValueLatticeElement> MergedResult =
+ ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange));
+
+ if (!PerPredRanges)
+ return MergedResult;
+
+ std::optional<BBLatticeElementMap> PredLHS =
+ TheCache.getCachedPredecessorInfo(LHS, BB);
+ if (!PredLHS)
+ return MergedResult;
+ std::optional<BBLatticeElementMap> PredRHS =
+ TheCache.getCachedPredecessorInfo(RHS, BB);
+ if (!PredRHS)
+ return MergedResult;
+
+ const BBLatticeElementMap &LHSPredMap = *PredLHS;
+ const BBLatticeElementMap &RHSPredMap = *PredRHS;
+
+ BBLatticeElementMap PredLatticeElements;
+ ValueLatticeElement OverallPredResult;
+ for (auto *Pred : predecessors(BB)) {
+ auto LHSIt = LHSPredMap.find_as(Pred);
+ if (LHSIt == LHSPredMap.end())
+ return MergedResult;
+ const ValueLatticeElement &LHSFromPred = LHSIt->second;
+ std::optional<ConstantRange> LHSFromPredRes =
+ LHSFromPred.asConstantRange(LHS->getType());
+ if (!LHSFromPredRes)
+ return MergedResult;
+
+ auto RHSIt = RHSPredMap.find_as(Pred);
+ if (RHSIt == RHSPredMap.end())
+ return MergedResult;
+ const ValueLatticeElement &RHSFromPred = RHSIt->second;
+ std::optional<ConstantRange> RHSFromPredRes =
+ RHSFromPred.asConstantRange(RHS->getType());
+ if (!RHSFromPredRes)
+ return MergedResult;
+
+ const ConstantRange &LHSFromPredRange = *LHSFromPredRes;
+ const ConstantRange &RHSFromPredRange = *RHSFromPredRes;
+ std::optional<ValueLatticeElement> PredResult =
+ ValueLatticeElement::getRange(OpFn(LHSFromPredRange, RHSFromPredRange));
+ if (!PredResult)
+ return MergedResult;
+ if (PredResult->isOverdefined()) {
+ LLVM_DEBUG(
+ dbgs() << " pred BB '" << Pred->getName() << "' for BB '"
+ << BB->getName()
+ << "' overdefined. Discarding all predecessor intervals.\n");
+ return MergedResult;
+ }
+ PredLatticeElements.insert({Pred, *PredResult});
+ OverallPredResult.mergeIn(*PredResult);
+ }
+
+ // If this point is reached, all predecessors for both LHS and RHS have
+ // constant ranges previously computed. Can cache result and use the
+ // OverallPredResult;
+ TheCache.insertPredecessorResults(I, BB, PredLatticeElements);
+
+ LLVM_DEBUG(dbgs() << " Using predecessor intervals, evaluated " << *I
+ << " to: " << OverallPredResult << ".\n");
+
+ if (!MergedResult)
+ return OverallPredResult;
+
+ LLVM_DEBUG(dbgs() << " Intersecting intervals for " << *I << ": "
+ << OverallPredResult << " and " << MergedResult << ".\n");
+ return MergedResult->intersect(OverallPredResult);
}
std::optional<ValueLatticeElement>
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index 6ed724b..7ae5f7e 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -2,14 +2,19 @@ add_llvm_component_library(LLVMCAS
ActionCache.cpp
ActionCaches.cpp
BuiltinCAS.cpp
+ DatabaseFile.cpp
InMemoryCAS.cpp
MappedFileRegionArena.cpp
ObjectStore.cpp
OnDiskCommon.cpp
+ OnDiskTrieRawHashMap.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS
+ LINK_LIBS
+ ${LLVM_PTHREAD_LIB}
+
LINK_COMPONENTS
Support
)
diff --git a/llvm/lib/CAS/DatabaseFile.cpp b/llvm/lib/CAS/DatabaseFile.cpp
new file mode 100644
index 0000000..db8ce1d
--- /dev/null
+++ b/llvm/lib/CAS/DatabaseFile.cpp
@@ -0,0 +1,123 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file implements the common abstractions for CAS database file.
+///
+//===----------------------------------------------------------------------===//
+
+#include "DatabaseFile.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+Error ondisk::createTableConfigError(std::errc ErrC, StringRef Path,
+ StringRef TableName, const Twine &Msg) {
+ return createStringError(make_error_code(ErrC),
+ Path + "[" + TableName + "]: " + Msg);
+}
+
+Error ondisk::checkTable(StringRef Label, size_t Expected, size_t Observed,
+ StringRef Path, StringRef TrieName) {
+ if (Expected == Observed)
+ return Error::success();
+ return createTableConfigError(std::errc::invalid_argument, Path, TrieName,
+ "mismatched " + Label +
+ " (expected: " + Twine(Expected) +
+ ", observed: " + Twine(Observed) + ")");
+}
+
+Expected<DatabaseFile>
+DatabaseFile::create(const Twine &Path, uint64_t Capacity,
+ function_ref<Error(DatabaseFile &)> NewDBConstructor) {
+ // Constructor for if the file doesn't exist.
+ auto NewFileConstructor = [&](MappedFileRegionArena &Alloc) -> Error {
+ if (Alloc.capacity() <
+ sizeof(Header) + sizeof(MappedFileRegionArena::Header))
+ return createTableConfigError(std::errc::argument_out_of_domain,
+ Path.str(), "datafile",
+ "Allocator too small for header");
+ (void)new (Alloc.data()) Header{getMagic(), getVersion(), {0}};
+ DatabaseFile DB(Alloc);
+ return NewDBConstructor(DB);
+ };
+
+ // Get or create the file.
+ MappedFileRegionArena Alloc;
+ if (Error E = MappedFileRegionArena::create(Path, Capacity, sizeof(Header),
+ NewFileConstructor)
+ .moveInto(Alloc))
+ return std::move(E);
+
+ return DatabaseFile::get(
+ std::make_unique<MappedFileRegionArena>(std::move(Alloc)));
+}
+
+Error DatabaseFile::addTable(TableHandle Table) {
+ assert(Table);
+ assert(&Table.getRegion() == &getRegion());
+ int64_t ExistingRootOffset = 0;
+ const int64_t NewOffset =
+ reinterpret_cast<const char *>(&Table.getHeader()) - getRegion().data();
+ if (H->RootTableOffset.compare_exchange_strong(ExistingRootOffset, NewOffset))
+ return Error::success();
+
+ // Silently ignore attempts to set the root to itself.
+ if (ExistingRootOffset == NewOffset)
+ return Error::success();
+
+ // Return an proper error message.
+ TableHandle Root(getRegion(), ExistingRootOffset);
+ if (Root.getName() == Table.getName())
+ return createStringError(
+ make_error_code(std::errc::not_supported),
+ "collision with existing table of the same name '" + Table.getName() +
+ "'");
+
+ return createStringError(make_error_code(std::errc::not_supported),
+ "cannot add new table '" + Table.getName() +
+ "'"
+ " to existing root '" +
+ Root.getName() + "'");
+}
+
+std::optional<TableHandle> DatabaseFile::findTable(StringRef Name) {
+ int64_t RootTableOffset = H->RootTableOffset.load();
+ if (!RootTableOffset)
+ return std::nullopt;
+
+ TableHandle Root(getRegion(), RootTableOffset);
+ if (Root.getName() == Name)
+ return Root;
+
+ return std::nullopt;
+}
+
+Error DatabaseFile::validate(MappedFileRegion &Region) {
+ if (Region.size() < sizeof(Header))
+ return createStringError(std::errc::invalid_argument,
+ "database: missing header");
+
+ // Check the magic and version.
+ auto *H = reinterpret_cast<Header *>(Region.data());
+ if (H->Magic != getMagic())
+ return createStringError(std::errc::invalid_argument,
+ "database: bad magic");
+ if (H->Version != getVersion())
+ return createStringError(std::errc::invalid_argument,
+ "database: wrong version");
+
+ auto *MFH = reinterpret_cast<MappedFileRegionArena::Header *>(Region.data() +
+ sizeof(Header));
+ // Check the bump-ptr, which should point past the header.
+ if (MFH->BumpPtr.load() < (int64_t)sizeof(Header))
+ return createStringError(std::errc::invalid_argument,
+ "database: corrupt bump-ptr");
+
+ return Error::success();
+}
diff --git a/llvm/lib/CAS/DatabaseFile.h b/llvm/lib/CAS/DatabaseFile.h
new file mode 100644
index 0000000..609e5f13
--- /dev/null
+++ b/llvm/lib/CAS/DatabaseFile.h
@@ -0,0 +1,153 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file declares the common interface for a DatabaseFile that is used to
+/// implement OnDiskCAS.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CAS_DATABASEFILE_H
+#define LLVM_LIB_CAS_DATABASEFILE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CAS/MappedFileRegionArena.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm::cas::ondisk {
+
+using MappedFileRegion = MappedFileRegionArena::RegionT;
+
+/// Generic handle for a table.
+///
+/// Generic table header layout:
+/// - 2-bytes: TableKind
+/// - 2-bytes: TableNameSize
+/// - 4-bytes: TableNameRelOffset (relative to header)
+class TableHandle {
+public:
+ enum class TableKind : uint16_t {
+ TrieRawHashMap = 1,
+ DataAllocator = 2,
+ };
+ struct Header {
+ TableKind Kind;
+ uint16_t NameSize;
+ int32_t NameRelOffset; ///< Relative to Header.
+ };
+
+ explicit operator bool() const { return H; }
+ const Header &getHeader() const { return *H; }
+ MappedFileRegion &getRegion() const { return *Region; }
+
+ template <class T> static void check() {
+ static_assert(
+ std::is_same<decltype(T::Header::GenericHeader), Header>::value,
+ "T::GenericHeader should be of type TableHandle::Header");
+ static_assert(offsetof(typename T::Header, GenericHeader) == 0,
+ "T::GenericHeader must be the head of T::Header");
+ }
+ template <class T> bool is() const { return T::Kind == H->Kind; }
+ template <class T> T dyn_cast() const {
+ check<T>();
+ if (is<T>())
+ return T(*Region, *reinterpret_cast<typename T::Header *>(H));
+ return T();
+ }
+ template <class T> T cast() const {
+ assert(is<T>());
+ return dyn_cast<T>();
+ }
+
+ StringRef getName() const {
+ auto *Begin = reinterpret_cast<const char *>(H) + H->NameRelOffset;
+ return StringRef(Begin, H->NameSize);
+ }
+
+ TableHandle() = default;
+ TableHandle(MappedFileRegion &Region, Header &H) : Region(&Region), H(&H) {}
+ TableHandle(MappedFileRegion &Region, intptr_t HeaderOffset)
+ : TableHandle(Region,
+ *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) {
+ }
+
+private:
+ MappedFileRegion *Region = nullptr;
+ Header *H = nullptr;
+};
+
+/// Encapsulate a database file, which:
+/// - Sets/checks magic.
+/// - Sets/checks version.
+/// - Points at an arbitrary root table.
+/// - Sets up a MappedFileRegionArena for allocation.
+///
+/// Top-level layout:
+/// - 4-bytes: Magic
+/// - 4-bytes: Version
+/// - 8-bytes: RootTableOffset (16-bits: Kind; 48-bits: Offset)
+/// - 8-bytes: BumpPtr from MappedFileRegionArena
+class DatabaseFile {
+public:
+ static constexpr uint32_t getMagic() { return 0xDA7ABA53UL; }
+ static constexpr uint32_t getVersion() { return 1UL; }
+ struct Header {
+ uint32_t Magic;
+ uint32_t Version;
+ std::atomic<int64_t> RootTableOffset;
+ };
+
+ const Header &getHeader() { return *H; }
+ MappedFileRegionArena &getAlloc() { return Alloc; }
+ MappedFileRegion &getRegion() { return Alloc.getRegion(); }
+
+ /// Add a table. This is currently not thread safe and should be called inside
+ /// NewDBConstructor.
+ Error addTable(TableHandle Table);
+
+ /// Find a table. May return null.
+ std::optional<TableHandle> findTable(StringRef Name);
+
+ /// Create the DatabaseFile at Path with Capacity.
+ static Expected<DatabaseFile>
+ create(const Twine &Path, uint64_t Capacity,
+ function_ref<Error(DatabaseFile &)> NewDBConstructor);
+
+ size_t size() const { return Alloc.size(); }
+
+private:
+ static Expected<DatabaseFile>
+ get(std::unique_ptr<MappedFileRegionArena> Alloc) {
+ if (Error E = validate(Alloc->getRegion()))
+ return std::move(E);
+ return DatabaseFile(std::move(Alloc));
+ }
+
+ static Error validate(MappedFileRegion &Region);
+
+ DatabaseFile(MappedFileRegionArena &Alloc)
+ : H(reinterpret_cast<Header *>(Alloc.data())), Alloc(Alloc) {}
+ DatabaseFile(std::unique_ptr<MappedFileRegionArena> Alloc)
+ : DatabaseFile(*Alloc) {
+ OwnedAlloc = std::move(Alloc);
+ }
+
+ Header *H = nullptr;
+ MappedFileRegionArena &Alloc;
+ std::unique_ptr<MappedFileRegionArena> OwnedAlloc;
+};
+
+Error createTableConfigError(std::errc ErrC, StringRef Path,
+ StringRef TableName, const Twine &Msg);
+
+Error checkTable(StringRef Label, size_t Expected, size_t Observed,
+ StringRef Path, StringRef TrieName);
+
+} // namespace llvm::cas::ondisk
+
+#endif
diff --git a/llvm/lib/CAS/InMemoryCAS.cpp b/llvm/lib/CAS/InMemoryCAS.cpp
index 255b89c..c63ee70d 100644
--- a/llvm/lib/CAS/InMemoryCAS.cpp
+++ b/llvm/lib/CAS/InMemoryCAS.cpp
@@ -57,6 +57,9 @@ public:
InMemoryObject() = delete;
InMemoryObject(InMemoryObject &&) = delete;
InMemoryObject(const InMemoryObject &) = delete;
+ InMemoryObject &operator=(const InMemoryObject &) = delete;
+ InMemoryObject &operator=(InMemoryObject &&) = delete;
+ virtual ~InMemoryObject() = default;
protected:
InMemoryObject(Kind K, const InMemoryIndexValueT &I) : IndexAndKind(&I, K) {}
diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
new file mode 100644
index 0000000..9b382dd7
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
@@ -0,0 +1,1178 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file Implements OnDiskTrieRawHashMap.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskTrieRawHashMap.h"
+#include "DatabaseFile.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/TrieHashIndexGenerator.h"
+#include "llvm/CAS/MappedFileRegionArena.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+#if LLVM_ENABLE_ONDISK_CAS
+
+//===----------------------------------------------------------------------===//
+// TrieRawHashMap data structures.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class SubtrieHandle;
+class TrieRawHashMapHandle;
+class TrieVisitor;
+
+/// A value stored in the slots inside a SubTrie. A stored value can either be a
+/// subtrie (encoded after negation) which is the file offset to another
+/// subtrie, or it can be a fileset to a DataRecord.
+class SubtrieSlotValue {
+public:
+ explicit operator bool() const { return !isEmpty(); }
+ bool isEmpty() const { return !Offset; }
+ bool isData() const { return Offset > 0; }
+ bool isSubtrie() const { return Offset < 0; }
+ uint64_t asData() const {
+ assert(isData());
+ return Offset;
+ }
+ uint64_t asSubtrie() const {
+ assert(isSubtrie());
+ return -Offset;
+ }
+
+ FileOffset asSubtrieFileOffset() const { return FileOffset(asSubtrie()); }
+
+ FileOffset asDataFileOffset() const { return FileOffset(asData()); }
+
+ int64_t getRawOffset() const { return Offset; }
+
+ static SubtrieSlotValue getDataOffset(int64_t Offset) {
+ return SubtrieSlotValue(Offset);
+ }
+
+ static SubtrieSlotValue getSubtrieOffset(int64_t Offset) {
+ return SubtrieSlotValue(-Offset);
+ }
+
+ static SubtrieSlotValue getDataOffset(FileOffset Offset) {
+ return getDataOffset(Offset.get());
+ }
+
+ static SubtrieSlotValue getSubtrieOffset(FileOffset Offset) {
+ return getDataOffset(Offset.get());
+ }
+
+ static SubtrieSlotValue getFromSlot(std::atomic<int64_t> &Slot) {
+ return SubtrieSlotValue(Slot.load());
+ }
+
+ SubtrieSlotValue() = default;
+
+private:
+ friend class SubtrieHandle;
+ explicit SubtrieSlotValue(int64_t Offset) : Offset(Offset) {}
+ int64_t Offset = 0;
+};
+
+/// Subtrie layout:
+/// - 2-bytes: StartBit
+/// - 1-bytes: NumBits=lg(num-slots)
+/// - 5-bytes: 0-pad
+/// - <slots>
+class SubtrieHandle {
+public:
+ struct Header {
+ /// The bit this subtrie starts on.
+ uint16_t StartBit;
+
+ /// The number of bits this subtrie handles. It has 2^NumBits slots.
+ uint8_t NumBits;
+
+ /// 0-pad to 8B.
+ uint8_t ZeroPad1B;
+ uint32_t ZeroPad4B;
+ };
+
+ /// Slot storage:
+ /// - zero: Empty
+ /// - positive: RecordOffset
+ /// - negative: SubtrieOffset
+ using SlotT = std::atomic<int64_t>;
+
+ static int64_t getSlotsSize(uint32_t NumBits) {
+ return sizeof(int64_t) * (1u << NumBits);
+ }
+
+ static int64_t getSize(uint32_t NumBits) {
+ return sizeof(SubtrieHandle::Header) + getSlotsSize(NumBits);
+ }
+
+ int64_t getSize() const { return getSize(H->NumBits); }
+ size_t getNumSlots() const { return Slots.size(); }
+
+ SubtrieSlotValue load(size_t I) const {
+ return SubtrieSlotValue(Slots[I].load());
+ }
+ void store(size_t I, SubtrieSlotValue V) {
+ return Slots[I].store(V.getRawOffset());
+ }
+
+ void printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const;
+
+ /// Return None on success, or the existing offset on failure.
+ bool compare_exchange_strong(size_t I, SubtrieSlotValue &Expected,
+ SubtrieSlotValue New) {
+ return Slots[I].compare_exchange_strong(Expected.Offset, New.Offset);
+ }
+
+ /// Sink \p V from \p I in this subtrie down to \p NewI in a new subtrie with
+ /// \p NumSubtrieBits.
+ ///
+ /// \p UnusedSubtrie maintains a 1-item "free" list of unused subtries. If a
+ /// new subtrie is created that isn't used because of a lost race, then it If
+ /// it's already valid, it should be used instead of allocating a new one.
+ /// should be returned as an out parameter to be passed back in the future.
+ /// If it's already valid, it should be used instead of allocating a new one.
+ ///
+ /// Returns the subtrie that now lives at \p I.
+ Expected<SubtrieHandle> sink(size_t I, SubtrieSlotValue V,
+ MappedFileRegionArena &Alloc,
+ size_t NumSubtrieBits,
+ SubtrieHandle &UnusedSubtrie, size_t NewI);
+
+ /// Only safe if the subtrie is empty.
+ void reinitialize(uint32_t StartBit, uint32_t NumBits);
+
+ SubtrieSlotValue getOffset() const {
+ return SubtrieSlotValue::getSubtrieOffset(
+ reinterpret_cast<const char *>(H) - Region->data());
+ }
+
+ FileOffset getFileOffset() const { return getOffset().asSubtrieFileOffset(); }
+
+ explicit operator bool() const { return H; }
+
+ Header &getHeader() const { return *H; }
+ uint32_t getStartBit() const { return H->StartBit; }
+ uint32_t getNumBits() const { return H->NumBits; }
+
+ static Expected<SubtrieHandle> create(MappedFileRegionArena &Alloc,
+ uint32_t StartBit, uint32_t NumBits);
+
+ static SubtrieHandle getFromFileOffset(MappedFileRegion &Region,
+ FileOffset Offset) {
+ return SubtrieHandle(Region, SubtrieSlotValue::getSubtrieOffset(Offset));
+ }
+
+ SubtrieHandle() = default;
+ SubtrieHandle(MappedFileRegion &Region, Header &H)
+ : Region(&Region), H(&H), Slots(getSlots(H)) {}
+ SubtrieHandle(MappedFileRegion &Region, SubtrieSlotValue Offset)
+ : SubtrieHandle(Region, *reinterpret_cast<Header *>(
+ Region.data() + Offset.asSubtrie())) {}
+
+private:
+ MappedFileRegion *Region = nullptr;
+ Header *H = nullptr;
+ MutableArrayRef<SlotT> Slots;
+
+ static MutableArrayRef<SlotT> getSlots(Header &H) {
+ return MutableArrayRef(reinterpret_cast<SlotT *>(&H + 1), 1u << H.NumBits);
+ }
+};
+
+/// Handle for a TrieRawHashMap table.
+///
+/// TrieRawHashMap table layout:
+/// - [8-bytes: Generic table header]
+/// - 1-byte: NumSubtrieBits
+/// - 1-byte: Flags (not used yet)
+/// - 2-bytes: NumHashBits
+/// - 4-bytes: RecordDataSize (in bytes)
+/// - 8-bytes: RootTrieOffset
+/// - 8-bytes: AllocatorOffset (reserved for implementing free lists)
+/// - <name> '\0'
+///
+/// Record layout:
+/// - <hash>
+/// - <data>
+class TrieRawHashMapHandle {
+public:
+ static constexpr TableHandle::TableKind Kind =
+ TableHandle::TableKind::TrieRawHashMap;
+
+ struct Header {
+ TableHandle::Header GenericHeader;
+ uint8_t NumSubtrieBits;
+ uint8_t Flags; ///< None used yet.
+ uint16_t NumHashBits;
+ uint32_t RecordDataSize;
+ std::atomic<int64_t> RootTrieOffset;
+ std::atomic<int64_t> AllocatorOffset;
+ };
+
+ operator TableHandle() const {
+ if (!H)
+ return TableHandle();
+ return TableHandle(*Region, H->GenericHeader);
+ }
+
+ struct RecordData {
+ OnDiskTrieRawHashMap::ValueProxy Proxy;
+ SubtrieSlotValue Offset;
+ FileOffset getFileOffset() const { return Offset.asDataFileOffset(); }
+ };
+
+ enum Limits : size_t {
+ /// Seems like 65528 hash bits ought to be enough.
+ MaxNumHashBytes = UINT16_MAX >> 3,
+ MaxNumHashBits = MaxNumHashBytes << 3,
+
+ /// 2^16 bits in a trie is 65536 slots. This restricts us to a 16-bit
+ /// index. This many slots is suspicously large anyway.
+ MaxNumRootBits = 16,
+
+ /// 2^10 bits in a trie is 1024 slots. This many slots seems suspiciously
+ /// large for subtries.
+ MaxNumSubtrieBits = 10,
+ };
+
+ static constexpr size_t getNumHashBytes(size_t NumHashBits) {
+ assert(NumHashBits % 8 == 0);
+ return NumHashBits / 8;
+ }
+ static constexpr size_t getRecordSize(size_t RecordDataSize,
+ size_t NumHashBits) {
+ return RecordDataSize + getNumHashBytes(NumHashBits);
+ }
+
+ RecordData getRecord(SubtrieSlotValue Offset);
+ Expected<RecordData> createRecord(MappedFileRegionArena &Alloc,
+ ArrayRef<uint8_t> Hash);
+
+ explicit operator bool() const { return H; }
+ const Header &getHeader() const { return *H; }
+ SubtrieHandle getRoot() const;
+ Expected<SubtrieHandle> getOrCreateRoot(MappedFileRegionArena &Alloc);
+ MappedFileRegion &getRegion() const { return *Region; }
+
+ size_t getFlags() const { return H->Flags; }
+ size_t getNumSubtrieBits() const { return H->NumSubtrieBits; }
+ size_t getNumHashBits() const { return H->NumHashBits; }
+ size_t getNumHashBytes() const { return getNumHashBytes(H->NumHashBits); }
+ size_t getRecordDataSize() const { return H->RecordDataSize; }
+ size_t getRecordSize() const {
+ return getRecordSize(H->RecordDataSize, H->NumHashBits);
+ }
+
+ TrieHashIndexGenerator getIndexGen(SubtrieHandle Root,
+ ArrayRef<uint8_t> Hash) {
+ assert(Root.getStartBit() == 0);
+ assert(getNumHashBytes() == Hash.size());
+ assert(getNumHashBits() == Hash.size() * 8);
+ return TrieHashIndexGenerator{Root.getNumBits(), getNumSubtrieBits(), Hash};
+ }
+
+ static Expected<TrieRawHashMapHandle>
+ create(MappedFileRegionArena &Alloc, StringRef Name,
+ std::optional<uint64_t> NumRootBits, uint64_t NumSubtrieBits,
+ uint64_t NumHashBits, uint64_t RecordDataSize);
+
+ void
+ print(raw_ostream &OS,
+ function_ref<void(ArrayRef<char>)> PrintRecordData = nullptr) const;
+
+ Error validate(
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier) const;
+ TrieRawHashMapHandle() = default;
+ TrieRawHashMapHandle(MappedFileRegion &Region, Header &H)
+ : Region(&Region), H(&H) {}
+ TrieRawHashMapHandle(MappedFileRegion &Region, intptr_t HeaderOffset)
+ : TrieRawHashMapHandle(
+ Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) {
+ }
+
+private:
+ MappedFileRegion *Region = nullptr;
+ Header *H = nullptr;
+};
+
+} // end anonymous namespace
+
+struct OnDiskTrieRawHashMap::ImplType {
+ DatabaseFile File;
+ TrieRawHashMapHandle Trie;
+};
+
+Expected<SubtrieHandle> SubtrieHandle::create(MappedFileRegionArena &Alloc,
+ uint32_t StartBit,
+ uint32_t NumBits) {
+ assert(StartBit <= TrieRawHashMapHandle::MaxNumHashBits);
+ assert(NumBits <= UINT8_MAX);
+ assert(NumBits <= TrieRawHashMapHandle::MaxNumRootBits);
+
+ auto Mem = Alloc.allocate(getSize(NumBits));
+ if (LLVM_UNLIKELY(!Mem))
+ return Mem.takeError();
+ auto *H =
+ new (*Mem) SubtrieHandle::Header{(uint16_t)StartBit, (uint8_t)NumBits,
+ /*ZeroPad1B=*/0, /*ZeroPad4B=*/0};
+ SubtrieHandle S(Alloc.getRegion(), *H);
+ for (auto I = S.Slots.begin(), E = S.Slots.end(); I != E; ++I)
+ new (I) SlotT(0);
+ return S;
+}
+
+SubtrieHandle TrieRawHashMapHandle::getRoot() const {
+ if (int64_t Root = H->RootTrieOffset)
+ return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Root));
+ return SubtrieHandle();
+}
+
+Expected<SubtrieHandle>
+TrieRawHashMapHandle::getOrCreateRoot(MappedFileRegionArena &Alloc) {
+ assert(&Alloc.getRegion() == &getRegion());
+ if (SubtrieHandle Root = getRoot())
+ return Root;
+
+ int64_t Race = 0;
+ auto LazyRoot = SubtrieHandle::create(Alloc, 0, H->NumSubtrieBits);
+ if (LLVM_UNLIKELY(!LazyRoot))
+ return LazyRoot.takeError();
+ if (H->RootTrieOffset.compare_exchange_strong(
+ Race, LazyRoot->getOffset().asSubtrie()))
+ return *LazyRoot;
+
+ // There was a race. Return the other root.
+ //
+ // TODO: Avoid leaking the lazy root by storing it in an allocator.
+ return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Race));
+}
+
+Expected<TrieRawHashMapHandle>
+TrieRawHashMapHandle::create(MappedFileRegionArena &Alloc, StringRef Name,
+ std::optional<uint64_t> NumRootBits,
+ uint64_t NumSubtrieBits, uint64_t NumHashBits,
+ uint64_t RecordDataSize) {
+ // Allocate.
+ auto Offset = Alloc.allocateOffset(sizeof(Header) + Name.size() + 1);
+ if (LLVM_UNLIKELY(!Offset))
+ return Offset.takeError();
+
+ // Construct the header and the name.
+ assert(Name.size() <= UINT16_MAX && "Expected smaller table name");
+ assert(NumSubtrieBits <= UINT8_MAX && "Expected valid subtrie bits");
+ assert(NumHashBits <= UINT16_MAX && "Expected valid hash size");
+ assert(RecordDataSize <= UINT32_MAX && "Expected smaller table name");
+ auto *H = new (Alloc.getRegion().data() + *Offset)
+ Header{{TableHandle::TableKind::TrieRawHashMap, (uint16_t)Name.size(),
+ (uint32_t)sizeof(Header)},
+ (uint8_t)NumSubtrieBits,
+ /*Flags=*/0,
+ (uint16_t)NumHashBits,
+ (uint32_t)RecordDataSize,
+ /*RootTrieOffset=*/{0},
+ /*AllocatorOffset=*/{0}};
+ char *NameStorage = reinterpret_cast<char *>(H + 1);
+ llvm::copy(Name, NameStorage);
+ NameStorage[Name.size()] = 0;
+
+ // Construct a root trie, if requested.
+ TrieRawHashMapHandle Trie(Alloc.getRegion(), *H);
+ auto Sub = SubtrieHandle::create(Alloc, 0, *NumRootBits);
+ if (LLVM_UNLIKELY(!Sub))
+ return Sub.takeError();
+ if (NumRootBits)
+ H->RootTrieOffset = Sub->getOffset().asSubtrie();
+ return Trie;
+}
+
+TrieRawHashMapHandle::RecordData
+TrieRawHashMapHandle::getRecord(SubtrieSlotValue Offset) {
+ char *Begin = Region->data() + Offset.asData();
+ OnDiskTrieRawHashMap::ValueProxy Proxy;
+ Proxy.Data = MutableArrayRef(Begin, getRecordDataSize());
+ Proxy.Hash = ArrayRef(reinterpret_cast<const uint8_t *>(Proxy.Data.end()),
+ getNumHashBytes());
+ return RecordData{Proxy, Offset};
+}
+
+Expected<TrieRawHashMapHandle::RecordData>
+TrieRawHashMapHandle::createRecord(MappedFileRegionArena &Alloc,
+ ArrayRef<uint8_t> Hash) {
+ assert(&Alloc.getRegion() == Region);
+ assert(Hash.size() == getNumHashBytes());
+ auto Offset = Alloc.allocateOffset(getRecordSize());
+ if (LLVM_UNLIKELY(!Offset))
+ return Offset.takeError();
+
+ RecordData Record = getRecord(SubtrieSlotValue::getDataOffset(*Offset));
+ llvm::copy(Hash, const_cast<uint8_t *>(Record.Proxy.Hash.begin()));
+ return Record;
+}
+
+Expected<OnDiskTrieRawHashMap::const_pointer>
+OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const {
+ // Check alignment.
+ if (!isAligned(MappedFileRegionArena::getAlign(), Offset.get()))
+ return createStringError(make_error_code(std::errc::protocol_error),
+ "unaligned file offset at 0x" +
+ utohexstr(Offset.get(), /*LowerCase=*/true));
+
+ // Check bounds.
+ //
+ // Note: There's no potential overflow when using \c uint64_t because Offset
+ // is in valid offset range and the record size is in \c [0,UINT32_MAX].
+ if (!validOffset(Offset) ||
+ Offset.get() + Impl->Trie.getRecordSize() > Impl->File.getAlloc().size())
+ return createStringError(make_error_code(std::errc::protocol_error),
+ "file offset too large: 0x" +
+ utohexstr(Offset.get(), /*LowerCase=*/true));
+
+ // Looks okay...
+ TrieRawHashMapHandle::RecordData D =
+ Impl->Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset));
+ return const_pointer(D.Proxy, D.getFileOffset());
+}
+
+OnDiskTrieRawHashMap::const_pointer
+OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const {
+ TrieRawHashMapHandle Trie = Impl->Trie;
+ assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash");
+
+ SubtrieHandle S = Trie.getRoot();
+ if (!S)
+ return const_pointer();
+
+ TrieHashIndexGenerator IndexGen = Trie.getIndexGen(S, Hash);
+ size_t Index = IndexGen.next();
+ for (;;) {
+ // Try to set the content.
+ SubtrieSlotValue V = S.load(Index);
+ if (!V)
+ return const_pointer();
+
+ // Check for an exact match.
+ if (V.isData()) {
+ TrieRawHashMapHandle::RecordData D = Trie.getRecord(V);
+ return D.Proxy.Hash == Hash ? const_pointer(D.Proxy, D.getFileOffset())
+ : const_pointer();
+ }
+
+ Index = IndexGen.next();
+ S = SubtrieHandle(Trie.getRegion(), V);
+ }
+}
+
+/// Only safe if the subtrie is empty.
+void SubtrieHandle::reinitialize(uint32_t StartBit, uint32_t NumBits) {
+ assert(StartBit > H->StartBit);
+ assert(NumBits <= H->NumBits);
+ // Ideally would also assert that all slots are empty, but that's expensive.
+
+ H->StartBit = StartBit;
+ H->NumBits = NumBits;
+}
+
+Expected<OnDiskTrieRawHashMap::pointer>
+OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
+ LazyInsertOnConstructCB OnConstruct,
+ LazyInsertOnLeakCB OnLeak) {
+ TrieRawHashMapHandle Trie = Impl->Trie;
+ assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash");
+
+ MappedFileRegionArena &Alloc = Impl->File.getAlloc();
+ std::optional<SubtrieHandle> S;
+ auto Err = Trie.getOrCreateRoot(Alloc).moveInto(S);
+ if (LLVM_UNLIKELY(Err))
+ return std::move(Err);
+
+ TrieHashIndexGenerator IndexGen = Trie.getIndexGen(*S, Hash);
+ size_t Index = IndexGen.next();
+
+ // Walk through the hash bytes and insert into correct trie position.
+ std::optional<TrieRawHashMapHandle::RecordData> NewRecord;
+ SubtrieHandle UnusedSubtrie;
+ for (;;) {
+ SubtrieSlotValue Existing = S->load(Index);
+
+ // Try to set it, if it's empty.
+ if (!Existing) {
+ if (!NewRecord) {
+ auto Err = Trie.createRecord(Alloc, Hash).moveInto(NewRecord);
+ if (LLVM_UNLIKELY(Err))
+ return std::move(Err);
+ if (OnConstruct)
+ OnConstruct(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy);
+ }
+
+ if (S->compare_exchange_strong(Index, Existing, NewRecord->Offset))
+ return pointer(NewRecord->Proxy, NewRecord->Offset.asDataFileOffset());
+
+ // Race means that Existing is no longer empty; fall through...
+ }
+
+ if (Existing.isSubtrie()) {
+ S = SubtrieHandle(Trie.getRegion(), Existing);
+ Index = IndexGen.next();
+ continue;
+ }
+
+ // Check for an exact match.
+ TrieRawHashMapHandle::RecordData ExistingRecord = Trie.getRecord(Existing);
+ if (ExistingRecord.Proxy.Hash == Hash) {
+ if (NewRecord && OnLeak)
+ OnLeak(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy,
+ ExistingRecord.Offset.asDataFileOffset(), ExistingRecord.Proxy);
+ return pointer(ExistingRecord.Proxy,
+ ExistingRecord.Offset.asDataFileOffset());
+ }
+
+ // Sink the existing content as long as the indexes match.
+ for (;;) {
+ size_t NextIndex = IndexGen.next();
+ size_t NewIndexForExistingContent =
+ IndexGen.getCollidingBits(ExistingRecord.Proxy.Hash);
+
+ auto Err = S->sink(Index, Existing, Alloc, IndexGen.getNumBits(),
+ UnusedSubtrie, NewIndexForExistingContent)
+ .moveInto(S);
+ if (LLVM_UNLIKELY(Err))
+ return std::move(Err);
+ Index = NextIndex;
+
+ // Found the difference.
+ if (NextIndex != NewIndexForExistingContent)
+ break;
+ }
+ }
+}
+
+Expected<SubtrieHandle> SubtrieHandle::sink(size_t I, SubtrieSlotValue V,
+ MappedFileRegionArena &Alloc,
+ size_t NumSubtrieBits,
+ SubtrieHandle &UnusedSubtrie,
+ size_t NewI) {
+ std::optional<SubtrieHandle> NewS;
+ if (UnusedSubtrie) {
+ // Steal UnusedSubtrie and initialize it.
+ NewS.emplace();
+ std::swap(*NewS, UnusedSubtrie);
+ NewS->reinitialize(getStartBit() + getNumBits(), NumSubtrieBits);
+ } else {
+ // Allocate a new, empty subtrie.
+ auto Err = SubtrieHandle::create(Alloc, getStartBit() + getNumBits(),
+ NumSubtrieBits)
+ .moveInto(NewS);
+ if (LLVM_UNLIKELY(Err))
+ return std::move(Err);
+ }
+
+ NewS->store(NewI, V);
+ if (compare_exchange_strong(I, V, NewS->getOffset()))
+ return *NewS; // Success!
+
+ // Raced.
+ assert(V.isSubtrie() && "Expected racing sink() to add a subtrie");
+
+ // Wipe out the new slot so NewS can be reused and set the out parameter.
+ NewS->store(NewI, SubtrieSlotValue());
+ UnusedSubtrie = *NewS;
+
+ // Return the subtrie added by the concurrent sink() call.
+ return SubtrieHandle(Alloc.getRegion(), V);
+}
+
+void OnDiskTrieRawHashMap::print(
+ raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
+ Impl->Trie.print(OS, PrintRecordData);
+}
+
+Error OnDiskTrieRawHashMap::validate(
+ function_ref<Error(FileOffset, ConstValueProxy)> RecordVerifier) const {
+ return Impl->Trie.validate(RecordVerifier);
+}
+
+// Helper function that prints hexdigit and have a sub-byte starting position.
+static void printHexDigits(raw_ostream &OS, ArrayRef<uint8_t> Bytes,
+ size_t StartBit, size_t NumBits) {
+ assert(StartBit % 4 == 0);
+ assert(NumBits % 4 == 0);
+ for (size_t I = StartBit, E = StartBit + NumBits; I != E; I += 4) {
+ uint8_t HexPair = Bytes[I / 8];
+ uint8_t HexDigit = I % 8 == 0 ? HexPair >> 4 : HexPair & 0xf;
+ OS << hexdigit(HexDigit, /*LowerCase=*/true);
+ }
+}
+
+static void printBits(raw_ostream &OS, ArrayRef<uint8_t> Bytes, size_t StartBit,
+ size_t NumBits) {
+ assert(StartBit + NumBits <= Bytes.size() * 8u);
+ for (size_t I = StartBit, E = StartBit + NumBits; I != E; ++I) {
+ uint8_t Byte = Bytes[I / 8];
+ size_t ByteOffset = I % 8;
+ if (size_t ByteShift = 8 - ByteOffset - 1)
+ Byte >>= ByteShift;
+ OS << (Byte & 0x1 ? '1' : '0');
+ }
+}
+
+void SubtrieHandle::printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const {
+ // afb[1c:00*01110*0]def
+ size_t EndBit = getStartBit() + getNumBits();
+ size_t HashEndBit = Bytes.size() * 8u;
+
+ size_t FirstBinaryBit = getStartBit() & ~0x3u;
+ printHexDigits(OS, Bytes, 0, FirstBinaryBit);
+
+ size_t LastBinaryBit = (EndBit + 3u) & ~0x3u;
+ OS << "[";
+ printBits(OS, Bytes, FirstBinaryBit, LastBinaryBit - FirstBinaryBit);
+ OS << "]";
+
+ printHexDigits(OS, Bytes, LastBinaryBit, HashEndBit - LastBinaryBit);
+}
+
+static void appendIndexBits(std::string &Prefix, size_t Index,
+ size_t NumSlots) {
+ std::string Bits;
+ for (size_t NumBits = 1u; NumBits < NumSlots; NumBits <<= 1) {
+ Bits.push_back('0' + (Index & 0x1));
+ Index >>= 1;
+ }
+ for (char Ch : llvm::reverse(Bits))
+ Prefix += Ch;
+}
+
+static void printPrefix(raw_ostream &OS, StringRef Prefix) {
+ while (Prefix.size() >= 4) {
+ uint8_t Digit;
+ bool ErrorParsingBinary = Prefix.take_front(4).getAsInteger(2, Digit);
+ assert(!ErrorParsingBinary);
+ (void)ErrorParsingBinary;
+ OS << hexdigit(Digit, /*LowerCase=*/true);
+ Prefix = Prefix.drop_front(4);
+ }
+ if (!Prefix.empty())
+ OS << "[" << Prefix << "]";
+}
+
+LLVM_DUMP_METHOD void OnDiskTrieRawHashMap::dump() const { print(dbgs()); }
+
+static Expected<size_t> checkParameter(StringRef Label, size_t Max,
+ std::optional<size_t> Value,
+ std::optional<size_t> Default,
+ StringRef Path, StringRef TableName) {
+ assert(Value || Default);
+ assert(!Default || *Default <= Max);
+ if (!Value)
+ return *Default;
+
+ if (*Value <= Max)
+ return *Value;
+ return createTableConfigError(
+ std::errc::argument_out_of_domain, Path, TableName,
+ "invalid " + Label + ": " + Twine(*Value) + " (max: " + Twine(Max) + ")");
+}
+
+size_t OnDiskTrieRawHashMap::size() const { return Impl->File.size(); }
+size_t OnDiskTrieRawHashMap::capacity() const {
+ return Impl->File.getRegion().size();
+}
+
+Expected<OnDiskTrieRawHashMap>
+OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
+ size_t NumHashBits, uint64_t DataSize,
+ uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize,
+ std::optional<size_t> NewTableNumRootBits,
+ std::optional<size_t> NewTableNumSubtrieBits) {
+ SmallString<128> PathStorage;
+ StringRef Path = PathTwine.toStringRef(PathStorage);
+ SmallString<128> TrieNameStorage;
+ StringRef TrieName = TrieNameTwine.toStringRef(TrieNameStorage);
+
+ constexpr size_t DefaultNumRootBits = 10;
+ constexpr size_t DefaultNumSubtrieBits = 6;
+
+ size_t NumRootBits;
+ if (Error E = checkParameter(
+ "root bits", TrieRawHashMapHandle::MaxNumRootBits,
+ NewTableNumRootBits, DefaultNumRootBits, Path, TrieName)
+ .moveInto(NumRootBits))
+ return std::move(E);
+
+ size_t NumSubtrieBits;
+ if (Error E = checkParameter("subtrie bits",
+ TrieRawHashMapHandle::MaxNumSubtrieBits,
+ NewTableNumSubtrieBits, DefaultNumSubtrieBits,
+ Path, TrieName)
+ .moveInto(NumSubtrieBits))
+ return std::move(E);
+
+ size_t NumHashBytes = NumHashBits >> 3;
+ if (Error E =
+ checkParameter("hash size", TrieRawHashMapHandle::MaxNumHashBits,
+ NumHashBits, std::nullopt, Path, TrieName)
+ .takeError())
+ return std::move(E);
+ assert(NumHashBits == NumHashBytes << 3 &&
+ "Expected hash size to be byte-aligned");
+ if (NumHashBits != NumHashBytes << 3)
+ return createTableConfigError(
+ std::errc::argument_out_of_domain, Path, TrieName,
+ "invalid hash size: " + Twine(NumHashBits) + " (not byte-aligned)");
+
+ // Constructor for if the file doesn't exist.
+ auto NewDBConstructor = [&](DatabaseFile &DB) -> Error {
+ auto Trie =
+ TrieRawHashMapHandle::create(DB.getAlloc(), TrieName, NumRootBits,
+ NumSubtrieBits, NumHashBits, DataSize);
+ if (LLVM_UNLIKELY(!Trie))
+ return Trie.takeError();
+
+ return DB.addTable(*Trie);
+ };
+
+ // Get or create the file.
+ Expected<DatabaseFile> File =
+ DatabaseFile::create(Path, MaxFileSize, NewDBConstructor);
+ if (!File)
+ return File.takeError();
+
+ // Find the trie and validate it.
+ std::optional<TableHandle> Table = File->findTable(TrieName);
+ if (!Table)
+ return createTableConfigError(std::errc::argument_out_of_domain, Path,
+ TrieName, "table not found");
+ if (Error E = checkTable("table kind", (size_t)TrieRawHashMapHandle::Kind,
+ (size_t)Table->getHeader().Kind, Path, TrieName))
+ return std::move(E);
+ auto Trie = Table->cast<TrieRawHashMapHandle>();
+ assert(Trie && "Already checked the kind");
+
+ // Check the hash and data size.
+ if (Error E = checkTable("hash size", NumHashBits, Trie.getNumHashBits(),
+ Path, TrieName))
+ return std::move(E);
+ if (Error E = checkTable("data size", DataSize, Trie.getRecordDataSize(),
+ Path, TrieName))
+ return std::move(E);
+
+ // No flags supported right now. Either corrupt, or coming from a future
+ // writer.
+ if (size_t Flags = Trie.getFlags())
+ return createTableConfigError(std::errc::invalid_argument, Path, TrieName,
+ "unsupported flags: " + Twine(Flags));
+
+ // Success.
+ OnDiskTrieRawHashMap::ImplType Impl{DatabaseFile(std::move(*File)), Trie};
+ return OnDiskTrieRawHashMap(std::make_unique<ImplType>(std::move(Impl)));
+}
+
+static Error createInvalidTrieError(uint64_t Offset, const Twine &Msg) {
+ return createStringError(make_error_code(std::errc::protocol_error),
+ "invalid trie at 0x" +
+ utohexstr(Offset, /*LowerCase=*/true) + ": " +
+ Msg);
+}
+
+//===----------------------------------------------------------------------===//
+// TrieVisitor data structures.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// A multi-threaded vistior to traverse the Trie.
+///
+/// TODO: add more sanity checks that isn't just plain data corruption. For
+/// example, some ill-formed data can be constructed to form a cycle using
+/// Sub-Tries and it can lead to inifinite loop when visiting (or inserting
+/// data).
+class TrieVisitor {
+public:
+ TrieVisitor(TrieRawHashMapHandle Trie, unsigned ThreadCount = 0,
+ unsigned ErrorLimit = 50)
+ : Trie(Trie), ErrorLimit(ErrorLimit),
+ Threads(hardware_concurrency(ThreadCount)) {}
+ virtual ~TrieVisitor() = default;
+ Error visit();
+
+private:
+ // Virtual method to implement the action when visiting a sub-trie.
+ virtual Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) {
+ return Error::success();
+ }
+
+ // Virtual method to implement the action when visiting a slot in a trie node.
+ virtual Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
+ SubtrieSlotValue Slot) {
+ return Error::success();
+ }
+
+protected:
+ TrieRawHashMapHandle Trie;
+
+private:
+ Error traverseTrieNode(SubtrieHandle Node, StringRef Prefix);
+
+ Error validateSubTrie(SubtrieHandle Node, bool IsRoot);
+
+ // Helper function to capture errors when visiting the trie nodes.
+ void addError(Error NewError) {
+ assert(NewError && "not an error");
+ std::lock_guard<std::mutex> ErrorLock(Lock);
+ if (NumError >= ErrorLimit) {
+ // Too many errors.
+ consumeError(std::move(NewError));
+ return;
+ }
+
+ if (Err)
+ Err = joinErrors(std::move(*Err), std::move(NewError));
+ else
+ Err = std::move(NewError);
+ NumError++;
+ }
+
+ bool tooManyErrors() {
+ std::lock_guard<std::mutex> ErrorLock(Lock);
+ return (bool)Err && NumError >= ErrorLimit;
+ }
+
+ const unsigned ErrorLimit;
+ std::optional<Error> Err;
+ unsigned NumError = 0;
+ std::mutex Lock;
+ DefaultThreadPool Threads;
+};
+
+/// A visitor that traverse and print the Trie.
+class TriePrinter : public TrieVisitor {
+public:
+ TriePrinter(TrieRawHashMapHandle Trie, raw_ostream &OS,
+ function_ref<void(ArrayRef<char>)> PrintRecordData)
+ : TrieVisitor(Trie, /*ThreadCount=*/1), OS(OS),
+ PrintRecordData(PrintRecordData) {}
+
+ Error printRecords() {
+ if (Records.empty())
+ return Error::success();
+
+ OS << "records\n";
+ llvm::sort(Records);
+ for (int64_t Offset : Records) {
+ TrieRawHashMapHandle::RecordData Record =
+ Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset));
+ if (auto Err = printRecord(Record))
+ return Err;
+ }
+ return Error::success();
+ }
+
+ Error printRecord(TrieRawHashMapHandle::RecordData &Record) {
+ OS << "- addr=" << (void *)Record.getFileOffset().get() << " ";
+ if (PrintRecordData) {
+ PrintRecordData(Record.Proxy.Data);
+ } else {
+ OS << "bytes=";
+ ArrayRef<uint8_t> Data(
+ reinterpret_cast<const uint8_t *>(Record.Proxy.Data.data()),
+ Record.Proxy.Data.size());
+ printHexDigits(OS, Data, 0, Data.size() * 8);
+ }
+ OS << "\n";
+ return Error::success();
+ }
+
+ Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) override {
+ if (Prefix.empty()) {
+ OS << "root";
+ } else {
+ OS << "subtrie=";
+ printPrefix(OS, Prefix);
+ }
+
+ OS << " addr="
+ << (void *)(reinterpret_cast<const char *>(&SubTrie.getHeader()) -
+ Trie.getRegion().data());
+ OS << " num-slots=" << SubTrie.getNumSlots() << "\n";
+ return Error::success();
+ }
+
+ Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
+ SubtrieSlotValue Slot) override {
+ OS << "- index=";
+ for (size_t Pad : {10, 100, 1000})
+ if (I < Pad && Subtrie.getNumSlots() >= Pad)
+ OS << "0";
+ OS << I << " ";
+ if (Slot.isSubtrie()) {
+ OS << "addr=" << (void *)Slot.asSubtrie();
+ OS << " subtrie=";
+ printPrefix(OS, Prefix);
+ OS << "\n";
+ return Error::success();
+ }
+ TrieRawHashMapHandle::RecordData Record = Trie.getRecord(Slot);
+ OS << "addr=" << (void *)Record.getFileOffset().get();
+ OS << " content=";
+ Subtrie.printHash(OS, Record.Proxy.Hash);
+ OS << "\n";
+ Records.push_back(Slot.asData());
+ return Error::success();
+ }
+
+private:
+ raw_ostream &OS;
+ function_ref<void(ArrayRef<char>)> PrintRecordData;
+ SmallVector<int64_t> Records;
+};
+
+/// TrieVerifier that adds additional verification on top of the basic visitor.
+class TrieVerifier : public TrieVisitor {
+public:
+ TrieVerifier(
+ TrieRawHashMapHandle Trie,
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier)
+ : TrieVisitor(Trie), RecordVerifier(RecordVerifier) {}
+
+private:
+ Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) final {
+ return Error::success();
+ }
+
+ Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
+ SubtrieSlotValue Slot) final {
+ if (RecordVerifier && Slot.isData()) {
+ if (!isAligned(MappedFileRegionArena::getAlign(), Slot.asData()))
+ return createInvalidTrieError(Slot.asData(), "mis-aligned data entry");
+
+ TrieRawHashMapHandle::RecordData Record =
+ Trie.getRecord(SubtrieSlotValue::getDataOffset(Slot.asData()));
+ return RecordVerifier(Slot.asDataFileOffset(),
+ OnDiskTrieRawHashMap::ConstValueProxy{
+ Record.Proxy.Hash, Record.Proxy.Data});
+ }
+ return Error::success();
+ }
+
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier;
+};
+} // namespace
+
+Error TrieVisitor::visit() {
+ auto Root = Trie.getRoot();
+ if (!Root)
+ return Error::success();
+
+ if (auto Err = validateSubTrie(Root, /*IsRoot=*/true))
+ return Err;
+
+ if (auto Err = visitSubTrie("", Root))
+ return Err;
+
+ SmallVector<SubtrieHandle> Subs;
+ SmallVector<std::string> Prefixes;
+ const size_t NumSlots = Root.getNumSlots();
+ for (size_t I = 0, E = NumSlots; I != E; ++I) {
+ SubtrieSlotValue Slot = Root.load(I);
+ if (!Slot)
+ continue;
+ uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData();
+ if (Offset >= (uint64_t)Trie.getRegion().size())
+ return createInvalidTrieError(Offset, "slot points out of bound");
+ std::string SubtriePrefix;
+ appendIndexBits(SubtriePrefix, I, NumSlots);
+ if (Slot.isSubtrie()) {
+ SubtrieHandle S(Trie.getRegion(), Slot);
+ Subs.push_back(S);
+ Prefixes.push_back(SubtriePrefix);
+ }
+ if (auto Err = visitSlot(I, Root, SubtriePrefix, Slot))
+ return Err;
+ }
+
+ for (size_t I = 0, E = Subs.size(); I != E; ++I) {
+ Threads.async(
+ [&](unsigned Idx) {
+ // Don't run if there is an error already.
+ if (tooManyErrors())
+ return;
+ if (auto Err = traverseTrieNode(Subs[Idx], Prefixes[Idx]))
+ addError(std::move(Err));
+ },
+ I);
+ }
+
+ Threads.wait();
+ if (Err)
+ return std::move(*Err);
+ return Error::success();
+}
+
+Error TrieVisitor::validateSubTrie(SubtrieHandle Node, bool IsRoot) {
+ char *Addr = reinterpret_cast<char *>(&Node.getHeader());
+ const int64_t Offset = Node.getFileOffset().get();
+ if (Addr + Node.getSize() >=
+ Trie.getRegion().data() + Trie.getRegion().size())
+ return createInvalidTrieError(Offset, "subtrie node spans out of bound");
+
+ if (!IsRoot &&
+ Node.getStartBit() + Node.getNumBits() > Trie.getNumHashBits()) {
+ return createInvalidTrieError(Offset,
+ "subtrie represents too many hash bits");
+ }
+
+ if (IsRoot) {
+ if (Node.getStartBit() != 0)
+ return createInvalidTrieError(Offset,
+ "root node doesn't start at 0 index");
+
+ return Error::success();
+ }
+
+ if (Node.getNumBits() > Trie.getNumSubtrieBits())
+ return createInvalidTrieError(Offset, "subtrie has wrong number of slots");
+
+ return Error::success();
+}
+
+Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) {
+ if (auto Err = validateSubTrie(Node, /*IsRoot=*/false))
+ return Err;
+
+ if (auto Err = visitSubTrie(Prefix, Node))
+ return Err;
+
+ SmallVector<SubtrieHandle> Subs;
+ SmallVector<std::string> Prefixes;
+ const size_t NumSlots = Node.getNumSlots();
+ for (size_t I = 0, E = NumSlots; I != E; ++I) {
+ SubtrieSlotValue Slot = Node.load(I);
+ if (!Slot)
+ continue;
+ uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData();
+ if (Offset >= (uint64_t)Trie.getRegion().size())
+ return createInvalidTrieError(Offset, "slot points out of bound");
+ std::string SubtriePrefix = Prefix.str();
+ appendIndexBits(SubtriePrefix, I, NumSlots);
+ if (Slot.isSubtrie()) {
+ SubtrieHandle S(Trie.getRegion(), Slot);
+ Subs.push_back(S);
+ Prefixes.push_back(SubtriePrefix);
+ }
+ if (auto Err = visitSlot(I, Node, SubtriePrefix, Slot))
+ return Err;
+ }
+ for (size_t I = 0, E = Subs.size(); I != E; ++I)
+ if (auto Err = traverseTrieNode(Subs[I], Prefixes[I]))
+ return Err;
+
+ return Error::success();
+}
+
+void TrieRawHashMapHandle::print(
+ raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
+ OS << "hash-num-bits=" << getNumHashBits()
+ << " hash-size=" << getNumHashBytes()
+ << " record-data-size=" << getRecordDataSize() << "\n";
+
+ TriePrinter Printer(*this, OS, PrintRecordData);
+ if (auto Err = Printer.visit())
+ OS << "error: " << toString(std::move(Err)) << "\n";
+
+ if (auto Err = Printer.printRecords())
+ OS << "error: " << toString(std::move(Err)) << "\n";
+
+ return;
+}
+
+Error TrieRawHashMapHandle::validate(
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier) const {
+ // Use the base TrieVisitor to identify the errors inside trie first.
+ TrieVisitor BasicVerifier(*this);
+ if (auto Err = BasicVerifier.visit())
+ return Err;
+
+ // If the trie data structure is sound, do a second pass to verify data and
+ // verifier function can assume the index is correct. However, there can be
+ // newly added bad entries that can still produce error.
+ TrieVerifier Verifier(*this, RecordVerifier);
+ return Verifier.visit();
+}
+
+#else // !LLVM_ENABLE_ONDISK_CAS
+
+struct OnDiskTrieRawHashMap::ImplType {};
+
+Expected<OnDiskTrieRawHashMap>
+OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
+ size_t NumHashBits, uint64_t DataSize,
+ uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize,
+ std::optional<size_t> NewTableNumRootBits,
+ std::optional<size_t> NewTableNumSubtrieBits) {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskTrieRawHashMap is not supported");
+}
+
+Expected<OnDiskTrieRawHashMap::pointer>
+OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
+ LazyInsertOnConstructCB OnConstruct,
+ LazyInsertOnLeakCB OnLeak) {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskTrieRawHashMap is not supported");
+}
+
+Expected<OnDiskTrieRawHashMap::const_pointer>
+OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskTrieRawHashMap is not supported");
+}
+
+OnDiskTrieRawHashMap::const_pointer
+OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const {
+ return const_pointer();
+}
+
+void OnDiskTrieRawHashMap::print(
+ raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
+}
+
+Error OnDiskTrieRawHashMap::validate(
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier) const {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskTrieRawHashMap is not supported");
+}
+
+size_t OnDiskTrieRawHashMap::size() const { return 0; }
+size_t OnDiskTrieRawHashMap::capacity() const { return 0; }
+
+#endif // LLVM_ENABLE_ONDISK_CAS
+
+OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(std::unique_ptr<ImplType> Impl)
+ : Impl(std::move(Impl)) {}
+OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(OnDiskTrieRawHashMap &&RHS) =
+ default;
+OnDiskTrieRawHashMap &
+OnDiskTrieRawHashMap::operator=(OnDiskTrieRawHashMap &&RHS) = default;
+OnDiskTrieRawHashMap::~OnDiskTrieRawHashMap() = default;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 0f3ff98..d98d180 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -105,6 +105,8 @@ DebugHandlerBase::~DebugHandlerBase() = default;
void DebugHandlerBase::beginModule(Module *M) {
if (M->debug_compile_units().empty())
Asm = nullptr;
+ else
+ LScopes.initialize(*M);
}
// Each LexicalScope has first instruction and last instruction to mark
@@ -269,7 +271,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
// Grab the lexical scopes for the function, if we don't have any of those
// then we're not going to be able to do anything.
- LScopes.initialize(*MF);
+ LScopes.scanFunction(*MF);
if (LScopes.empty()) {
beginFunctionImpl(MF);
return;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 67f526f..518121e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -537,8 +537,9 @@ void DwarfCompileUnit::addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName,
// and DW_AT_high_pc attributes. If there are global variables in this
// scope then create and insert DIEs for these variables.
DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP,
+ const Function &F,
MCSymbol *LineTableSym) {
- DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
+ DIE *SPDie = getOrCreateSubprogramDIE(SP, &F, includeMinimalInlineScopes());
SmallVector<RangeSpan, 2> BB_List;
// If basic block sections are on, ranges for each basic block section has
// to be emitted separately.
@@ -1122,9 +1123,10 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
}
DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
+ const Function &F,
LexicalScope *Scope,
MCSymbol *LineTableSym) {
- DIE &ScopeDIE = updateSubprogramScopeDIE(Sub, LineTableSym);
+ DIE &ScopeDIE = updateSubprogramScopeDIE(Sub, F, LineTableSym);
if (Scope) {
assert(!Scope->getInlinedAt());
@@ -1198,32 +1200,17 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
return ObjectPointer;
}
-void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
- LexicalScope *Scope) {
- auto *SP = cast<DISubprogram>(Scope->getScopeNode());
- if (getAbstractScopeDIEs().count(SP))
- return;
+DIE &DwarfCompileUnit::getOrCreateAbstractSubprogramDIE(
+ const DISubprogram *SP) {
+ if (auto *AbsDef = getAbstractScopeDIEs().lookup(SP))
+ return *AbsDef;
- DIE *ContextDIE;
- DwarfCompileUnit *ContextCU = this;
-
- if (includeMinimalInlineScopes())
- ContextDIE = &getUnitDie();
- // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with
- // the important distinction that the debug node is not associated with the
- // DIE (since the debug node will be associated with the concrete DIE, if
- // any). It could be refactored to some common utility function.
- else if (auto *SPDecl = SP->getDeclaration()) {
- ContextDIE = &getUnitDie();
- getOrCreateSubprogramDIE(SPDecl);
- } else {
- ContextDIE = getOrCreateContextDIE(SP->getScope());
- // The scope may be shared with a subprogram that has already been
- // constructed in another CU, in which case we need to construct this
- // subprogram in the same CU.
- ContextCU = DD->lookupCU(ContextDIE->getUnitDie());
- }
+ auto [ContextDIE, ContextCU] = getOrCreateAbstractSubprogramContextDIE(SP);
+ return createAbstractSubprogramDIE(SP, ContextDIE, ContextCU);
+}
+DIE &DwarfCompileUnit::createAbstractSubprogramDIE(
+ const DISubprogram *SP, DIE *ContextDIE, DwarfCompileUnit *ContextCU) {
// Passing null as the associated node because the abstract definition
// shouldn't be found by lookup.
DIE &AbsDef = ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram,
@@ -1237,8 +1224,45 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
DD->getDwarfVersion() <= 4 ? std::optional<dwarf::Form>()
: dwarf::DW_FORM_implicit_const,
dwarf::DW_INL_inlined);
- if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, AbsDef))
- ContextCU->addDIEEntry(AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
+
+ return AbsDef;
+}
+
+std::pair<DIE *, DwarfCompileUnit *>
+DwarfCompileUnit::getOrCreateAbstractSubprogramContextDIE(
+ const DISubprogram *SP) {
+ bool Minimal = includeMinimalInlineScopes();
+ bool IgnoreScope = shouldPlaceInUnitDIE(SP, Minimal);
+ DIE *ContextDIE = getOrCreateSubprogramContextDIE(SP, IgnoreScope);
+
+ if (auto *SPDecl = SP->getDeclaration())
+ if (!Minimal)
+ getOrCreateSubprogramDIE(SPDecl, nullptr);
+
+ // The scope may be shared with a subprogram that has already been
+ // constructed in another CU, in which case we need to construct this
+ // subprogram in the same CU.
+ auto *ContextCU = IgnoreScope ? this : DD->lookupCU(ContextDIE->getUnitDie());
+
+ return std::make_pair(ContextDIE, ContextCU);
+}
+
+void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
+ LexicalScope *Scope) {
+ auto *SP = cast<DISubprogram>(Scope->getScopeNode());
+
+ // Populate subprogram DIE only once.
+ if (!getFinalizedAbstractSubprograms().insert(SP).second)
+ return;
+
+ auto [ContextDIE, ContextCU] = getOrCreateAbstractSubprogramContextDIE(SP);
+ DIE *AbsDef = getAbstractScopeDIEs().lookup(SP);
+ if (!AbsDef)
+ AbsDef = &createAbstractSubprogramDIE(SP, ContextDIE, ContextCU);
+
+ if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef))
+ ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer,
+ *ObjectPointer);
}
bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const {
@@ -1293,9 +1317,9 @@ DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const {
}
DIE &DwarfCompileUnit::constructCallSiteEntryDIE(
- DIE &ScopeDIE, const DISubprogram *CalleeSP, bool IsTail,
- const MCSymbol *PCAddr, const MCSymbol *CallAddr, unsigned CallReg,
- DIType *AllocSiteTy) {
+ DIE &ScopeDIE, const DISubprogram *CalleeSP, const Function *CalleeF,
+ bool IsTail, const MCSymbol *PCAddr, const MCSymbol *CallAddr,
+ unsigned CallReg, DIType *AllocSiteTy) {
// Insert a call site entry DIE within ScopeDIE.
DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site),
ScopeDIE, nullptr);
@@ -1305,7 +1329,7 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(
addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
MachineLocation(CallReg));
} else if (CalleeSP) {
- DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
+ DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP, CalleeF);
assert(CalleeDIE && "Could not create DIE for call site entry origin");
if (AddLinkageNamesToDeclCallOriginsForTuning(DD) &&
!CalleeSP->isDefinition() &&
@@ -1396,7 +1420,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
if (auto *AbsSPDie = getAbstractScopeDIEs().lookup(SP))
EntityDie = AbsSPDie;
else
- EntityDie = getOrCreateSubprogramDIE(SP);
+ EntityDie = getOrCreateSubprogramDIE(SP, nullptr);
} else if (auto *T = dyn_cast<DIType>(Entity))
EntityDie = getOrCreateTypeDIE(T);
else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity))
@@ -1805,3 +1829,20 @@ DIE *DwarfCompileUnit::getOrCreateContextDIE(const DIScope *Context) {
}
return DwarfUnit::getOrCreateContextDIE(Context);
}
+
+DIE *DwarfCompileUnit::getOrCreateSubprogramDIE(const DISubprogram *SP,
+ const Function *F,
+ bool Minimal) {
+ if (!F && SP->isDefinition()) {
+ F = DD->getLexicalScopes().getFunction(SP);
+
+ if (!F) {
+ // SP may belong to another CU. Determine the CU similarly
+ // to DwarfDebug::constructAbstractSubprogramScopeDIE.
+ return &DD->getOrCreateAbstractSubprogramCU(SP, *this)
+ .getOrCreateAbstractSubprogramDIE(SP);
+ }
+ }
+
+ return DwarfUnit::getOrCreateSubprogramDIE(SP, F, Minimal);
+}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index c2f6ca0..a3bbc83 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -81,6 +81,7 @@ class DwarfCompileUnit final : public DwarfUnit {
// List of abstract local scopes (either DISubprogram or DILexicalBlock).
DenseMap<const DILocalScope *, DIE *> AbstractLocalScopeDIEs;
+ SmallPtrSet<const DISubprogram *, 8> FinalizedAbstractSubprograms;
// List of inlined lexical block scopes that belong to subprograms within this
// CU.
@@ -137,12 +138,28 @@ class DwarfCompileUnit final : public DwarfUnit {
return DU->getAbstractEntities();
}
+ auto &getFinalizedAbstractSubprograms() {
+ if (isDwoUnit() && !DD->shareAcrossDWOCUs())
+ return FinalizedAbstractSubprograms;
+ return DU->getFinalizedAbstractSubprograms();
+ }
+
void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;
/// Add info for Wasm-global-based relocation.
void addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName,
uint64_t GlobalIndex);
+ /// Create context DIE for abstract subprogram.
+ /// \returns The context DIE and the compile unit where abstract
+ /// DIE should be constructed.
+ std::pair<DIE *, DwarfCompileUnit *>
+ getOrCreateAbstractSubprogramContextDIE(const DISubprogram *SP);
+
+ /// Create new DIE for abstract subprogram.
+ DIE &createAbstractSubprogramDIE(const DISubprogram *SP, DIE *ContextDIE,
+ DwarfCompileUnit *ContextCU);
+
public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
@@ -216,7 +233,8 @@ public:
/// DW_AT_low_pc, DW_AT_high_pc and DW_AT_LLVM_stmt_sequence attributes.
/// If there are global variables in this scope then create and insert DIEs
/// for these variables.
- DIE &updateSubprogramScopeDIE(const DISubprogram *SP, MCSymbol *LineTableSym);
+ DIE &updateSubprogramScopeDIE(const DISubprogram *SP, const Function &F,
+ MCSymbol *LineTableSym);
void constructScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE);
@@ -259,12 +277,18 @@ public:
/// This instance of 'getOrCreateContextDIE()' can handle DILocalScope.
DIE *getOrCreateContextDIE(const DIScope *Ty) override;
+ DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, const Function *F,
+ bool Minimal = false) override;
+
/// Construct a DIE for this subprogram scope.
- DIE &constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope,
- MCSymbol *LineTableSym);
+ DIE &constructSubprogramScopeDIE(const DISubprogram *Sub, const Function &F,
+ LexicalScope *Scope, MCSymbol *LineTableSym);
DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
+ /// Create an abstract subprogram DIE, that should later be populated
+ /// by \ref constructAbstractSubprogramScopeDIE.
+ DIE &getOrCreateAbstractSubprogramDIE(const DISubprogram *SP);
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
/// Whether to use the GNU analog for a DWARF5 tag, attribute, or location
@@ -281,14 +305,15 @@ public:
dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const;
/// Construct a call site entry DIE describing a call within \p Scope to a
- /// callee described by \p CalleeSP.
+ /// callee described by \p CalleeSP and \p CalleeF.
/// \p IsTail specifies whether the call is a tail call.
/// \p PCAddr points to the PC value after the call instruction.
/// \p CallAddr points to the PC value at the call instruction (or is null).
/// \p CallReg is a register location for an indirect call. For direct calls
/// the \p CallReg is set to 0.
DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP,
- bool IsTail, const MCSymbol *PCAddr,
+ const Function *CalleeF, bool IsTail,
+ const MCSymbol *PCAddr,
const MCSymbol *CallAddr, unsigned CallReg,
DIType *AllocSiteTy);
/// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 05d3d18..09d5f9c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -548,6 +548,16 @@ bool DwarfDebug::shareAcrossDWOCUs() const {
return SplitDwarfCrossCuReferences;
}
+DwarfCompileUnit &
+DwarfDebug::getOrCreateAbstractSubprogramCU(const DISubprogram *SP,
+ DwarfCompileUnit &SrcCU) {
+ auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
+ if (CU.getSkeleton())
+ return shareAcrossDWOCUs() ? CU : SrcCU;
+
+ return CU;
+}
+
void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
LexicalScope *Scope) {
assert(Scope && Scope->getScopeNode());
@@ -559,14 +569,11 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
- if (auto *SkelCU = CU.getSkeleton()) {
- (shareAcrossDWOCUs() ? CU : SrcCU)
- .constructAbstractSubprogramScopeDIE(Scope);
+ auto &TargetCU = getOrCreateAbstractSubprogramCU(SP, SrcCU);
+ TargetCU.constructAbstractSubprogramScopeDIE(Scope);
+ if (auto *SkelCU = CU.getSkeleton())
if (CU.getCUNode()->getSplitDebugInlining())
SkelCU->constructAbstractSubprogramScopeDIE(Scope);
- } else {
- CU.constructAbstractSubprogramScopeDIE(Scope);
- }
}
/// Represents a parameter whose call site value can be described by applying a
@@ -997,8 +1004,9 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
->getName(CallReg)))
<< (IsTail ? " [IsTail]" : "") << "\n");
- DIE &CallSiteDIE = CU.constructCallSiteEntryDIE(
- ScopeDIE, CalleeSP, IsTail, PCAddr, CallAddr, CallReg, AllocSiteTy);
+ DIE &CallSiteDIE =
+ CU.constructCallSiteEntryDIE(ScopeDIE, CalleeSP, CalleeDecl, IsTail,
+ PCAddr, CallAddr, CallReg, AllocSiteTy);
// Optionally emit call-site-param debug info.
if (emitDebugEntryValues()) {
@@ -2707,7 +2715,8 @@ void DwarfDebug::skippedNonDebugFunction() {
// Gather and emit post-function debug information.
void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
- const DISubprogram *SP = MF->getFunction().getSubprogram();
+ const Function &F = MF->getFunction();
+ const DISubprogram *SP = F.getSubprogram();
assert(CurFn == MF &&
"endFunction should be called with the same function as beginFunction");
@@ -2776,11 +2785,12 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
ProcessedSPNodes.insert(SP);
DIE &ScopeDIE =
- TheCU.constructSubprogramScopeDIE(SP, FnScope, FunctionLineTableLabel);
+ TheCU.constructSubprogramScopeDIE(SP, F, FnScope, FunctionLineTableLabel);
if (auto *SkelCU = TheCU.getSkeleton())
if (!LScopes.getAbstractScopesList().empty() &&
TheCU.getCUNode()->getSplitDebugInlining())
- SkelCU->constructSubprogramScopeDIE(SP, FnScope, FunctionLineTableLabel);
+ SkelCU->constructSubprogramScopeDIE(SP, F, FnScope,
+ FunctionLineTableLabel);
FunctionLineTableLabel = nullptr;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 89813dc..1a1b28a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -906,6 +906,10 @@ public:
return CUDieMap.lookup(Die);
}
+ /// Find the matching DwarfCompileUnit for the given SP referenced from SrcCU.
+ DwarfCompileUnit &getOrCreateAbstractSubprogramCU(const DISubprogram *SP,
+ DwarfCompileUnit &SrcCU);
+
unsigned getStringTypeLoc(const DIStringType *ST) const {
return StringTypeLocMap.lookup(ST);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 0fc2b91..ef1524d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -11,6 +11,7 @@
#include "DwarfStringPool.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DIE.h"
@@ -27,6 +28,7 @@ class DbgVariable;
class DbgLabel;
class DINode;
class DILocalScope;
+class DISubprogram;
class DwarfCompileUnit;
class DwarfUnit;
class LexicalScope;
@@ -94,6 +96,9 @@ class DwarfFile {
// Collection of abstract subprogram DIEs.
DenseMap<const DILocalScope *, DIE *> AbstractLocalScopeDIEs;
DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
+ /// Keeps track of abstract subprograms to populate them only once.
+ // FIXME: merge creation and population of abstract scopes.
+ SmallPtrSet<const DISubprogram *, 8> FinalizedAbstractSubprograms;
/// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
/// be shared across CUs, that is why we keep the map here instead
@@ -174,6 +179,10 @@ public:
return AbstractEntities;
}
+ auto &getFinalizedAbstractSubprograms() {
+ return FinalizedAbstractSubprograms;
+ }
+
void insertDIE(const MDNode *TypeMD, DIE *Die) {
DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index d76fd0c..62fb5eb 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -573,7 +573,7 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
if (auto *NS = dyn_cast<DINamespace>(Context))
return getOrCreateNameSpace(NS);
if (auto *SP = dyn_cast<DISubprogram>(Context))
- return getOrCreateSubprogramDIE(SP);
+ return getOrCreateSubprogramDIE(SP, nullptr);
if (auto *M = dyn_cast<DIModule>(Context))
return getOrCreateModule(M);
return getDIE(Context);
@@ -1066,7 +1066,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (!Element)
continue;
if (auto *SP = dyn_cast<DISubprogram>(Element))
- getOrCreateSubprogramDIE(SP);
+ getOrCreateSubprogramDIE(SP, nullptr);
else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
if (DDTy->getTag() == dwarf::DW_TAG_friend) {
DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
@@ -1335,22 +1335,21 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
return &MDie;
}
-DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
+DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP,
+ const Function *FnHint, bool Minimal) {
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE (as is the case for member function
// declarations).
DIE *ContextDIE =
- Minimal ? &getUnitDie() : getOrCreateContextDIE(SP->getScope());
+ getOrCreateSubprogramContextDIE(SP, shouldPlaceInUnitDIE(SP, Minimal));
if (DIE *SPDie = getDIE(SP))
return SPDie;
if (auto *SPDecl = SP->getDeclaration()) {
if (!Minimal) {
- // Add subprogram definitions to the CU die directly.
- ContextDIE = &getUnitDie();
// Build the decl now to ensure it precedes the definition.
- getOrCreateSubprogramDIE(SPDecl);
+ getOrCreateSubprogramDIE(SPDecl, nullptr);
// Check whether the DIE for SP has already been created after the call
// above.
// FIXME: Should the creation of definition subprogram DIE during
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index fe05766..bb00ec3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -256,7 +256,9 @@ public:
DIE *getOrCreateNameSpace(const DINamespace *NS);
DIE *getOrCreateModule(const DIModule *M);
- DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false);
+ virtual DIE *getOrCreateSubprogramDIE(const DISubprogram *SP,
+ const Function *FnHint,
+ bool Minimal = false);
void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
bool SkipSPAttributes = false);
@@ -343,6 +345,18 @@ protected:
/// Emit the common part of the header for this unit.
void emitCommonHeader(bool UseOffsets, dwarf::UnitType UT);
+ bool shouldPlaceInUnitDIE(const DISubprogram *SP, bool Minimal) {
+ // Add subprogram declarations to the CU die directly.
+ return Minimal || SP->getDeclaration();
+ }
+
+ DIE *getOrCreateSubprogramContextDIE(const DISubprogram *SP,
+ bool IgnoreScope) {
+ if (IgnoreScope)
+ return &getUnitDie();
+ return getOrCreateContextDIE(SP->getScope());
+ }
+
private:
/// A helper to add a wide integer constant to a DIE using a block
/// form.
diff --git a/llvm/lib/CodeGen/LexicalScopes.cpp b/llvm/lib/CodeGen/LexicalScopes.cpp
index 5916f61..9fc9ac9 100644
--- a/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@@ -36,8 +37,16 @@ using namespace llvm;
#define DEBUG_TYPE "lexicalscopes"
-/// reset - Reset the instance so that it's prepared for another function.
-void LexicalScopes::reset() {
+static bool skipUnit(const DICompileUnit *CU) {
+ return CU->getEmissionKind() == DICompileUnit::NoDebug;
+}
+
+void LexicalScopes::resetModule() {
+ FunctionMap.clear();
+ resetFunction();
+}
+
+void LexicalScopes::resetFunction() {
MF = nullptr;
CurrentFnLexicalScope = nullptr;
LexicalScopeMap.clear();
@@ -47,12 +56,19 @@ void LexicalScopes::reset() {
DominatedBlocks.clear();
}
-/// initialize - Scan machine function and constuct lexical scope nest.
-void LexicalScopes::initialize(const MachineFunction &Fn) {
- reset();
+void LexicalScopes::initialize(const Module &M) {
+ resetModule();
+ for (const Function &F : M) {
+ DISubprogram *SP = F.getSubprogram();
+ if (SP && (!SP->getUnit() || !skipUnit(SP->getUnit())))
+ FunctionMap[SP] = &F;
+ }
+}
+
+void LexicalScopes::scanFunction(const MachineFunction &Fn) {
+ resetFunction();
// Don't attempt any lexical scope creation for a NoDebug compile unit.
- if (Fn.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
- DICompileUnit::NoDebug)
+ if (skipUnit(Fn.getFunction().getSubprogram()->getUnit()))
return;
MF = &Fn;
SmallVector<InsnRange, 4> MIRanges;
@@ -143,8 +159,7 @@ LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope,
const DILocation *IA) {
if (IA) {
// Skip scopes inlined from a NoDebug compile unit.
- if (Scope->getSubprogram()->getUnit()->getEmissionKind() ==
- DICompileUnit::NoDebug)
+ if (skipUnit(Scope->getSubprogram()->getUnit()))
return getOrCreateLexicalScope(IA);
// Create an abstract scope for inlined function.
getOrCreateAbstractScope(Scope);
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index a8143bd..0037bdd 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -3721,7 +3721,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
TFI = MF.getSubtarget().getFrameLowering();
TFI->getCalleeSaves(MF, CalleeSavedRegs);
MFI = &MF.getFrameInfo();
- LS.initialize(MF);
+ LS.scanFunction(MF);
const auto &STI = MF.getSubtarget();
AdjustsStackInCalls = MFI->adjustsStack() &&
diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index 82e0c28..b9ea03f 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -2231,7 +2231,7 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF,
TFI->getCalleeSaves(MF, CalleeSavedRegs);
this->ShouldEmitDebugEntryValues = ShouldEmitDebugEntryValues;
- LS.initialize(MF);
+ LS.scanFunction(MF);
bool Changed = false;
bool OLChanged = false;
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 84e9f03..001ba52 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -1263,7 +1263,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
void LiveDebugVariables::LDVImpl::computeIntervals() {
LexicalScopes LS;
- LS.initialize(*MF);
+ LS.scanFunction(*MF);
for (const auto &UV : userValues) {
UV->computeIntervals(MF->getRegInfo(), *TRI, *LIS, LS);
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index e911ce8..1154855 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1549,7 +1549,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
report("G_BUILD_VECTOR result element type must match source type", MI);
if (DstTy.getNumElements() != MI->getNumOperands() - 1)
- report("G_BUILD_VECTOR must have an operand for each elemement", MI);
+ report("G_BUILD_VECTOR must have an operand for each element", MI);
for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
@@ -2398,11 +2398,11 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// The next two checks allow COPY between physical and virtual registers,
// when the virtual register has a scalable size and the physical register
- // has a fixed size. These checks allow COPY between *potentialy* mismatched
- // sizes. However, once RegisterBankSelection occurs, MachineVerifier should
- // be able to resolve a fixed size for the scalable vector, and at that
- // point this function will know for sure whether the sizes are mismatched
- // and correctly report a size mismatch.
+ // has a fixed size. These checks allow COPY between *potentially*
+ // mismatched sizes. However, once RegisterBankSelection occurs,
+ // MachineVerifier should be able to resolve a fixed size for the scalable
+ // vector, and at that point this function will know for sure whether the
+ // sizes are mismatched and correctly report a size mismatch.
if (SrcReg.isPhysical() && DstReg.isVirtual() && DstSize.isScalable() &&
!SrcSize.isScalable())
break;
@@ -3213,13 +3213,13 @@ struct VRegFilter {
private:
static constexpr unsigned SparseUniverseMax = 10 * 1024 * 8;
- // VRegs indexed within SparseUniverseMax are tracked by Sparse, those beyound
- // are tracked by Dense. The only purpose of the threashold and the Dense set
+ // VRegs indexed within SparseUniverseMax are tracked by Sparse, those beyond
+ // are tracked by Dense. The only purpose of the threshold and the Dense set
// is to have a reasonably growing memory usage in pathological cases (large
// number of very sparse VRegFilter instances live at the same time). In
// practice even in the worst-by-execution time cases having all elements
// tracked by Sparse (very large SparseUniverseMax scenario) tends to be more
- // space efficient than if tracked by Dense. The threashold is set to keep the
+ // space efficient than if tracked by Dense. The threshold is set to keep the
// worst-case memory usage within 2x of figures determined empirically for
// "all Dense" scenario in such worst-by-execution-time cases.
BitVector Sparse;
@@ -3459,7 +3459,7 @@ void MachineVerifier::visitMachineFunctionAfter() {
// Check live-in list of each MBB. If a register is live into MBB, check
// that the register is in regsLiveOut of each predecessor block. Since
- // this must come from a definition in the predecesssor or its live-in
+ // this must come from a definition in the predecessor or its live-in
// list, this will catch a live-through case where the predecessor does not
// have the register in its live-in list. This currently only checks
// registers that have no aliases, are not allocatable and are not
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c815686..77df4b4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17983,8 +17983,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// (fsub A, 0) -> A
if (N1CFP && N1CFP->isZero()) {
- if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
- Flags.hasNoSignedZeros()) {
+ if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) {
return N0;
}
}
@@ -17997,8 +17996,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// (fsub -0.0, N1) -> -N1
if (N0CFP && N0CFP->isZero()) {
- if (N0CFP->isNegative() ||
- (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
+ if (N0CFP->isNegative() || Flags.hasNoSignedZeros()) {
// We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
// flushed to zero, unless all users treat denorms as zero (DAZ).
// FIXME: This transform will change the sign of a NaN and the behavior
@@ -18014,8 +18012,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
- if ((Options.NoSignedZerosFPMath ||
- (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+ if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
N1.getOpcode() == ISD::FADD) {
// X - (X + Y) -> -Y
if (N0 == N1->getOperand(0))
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index daebf447..dd83168 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -2847,6 +2847,7 @@ unsigned CastInst::isEliminableCastPair(
// FPTRUNC > FloatPt n/a FloatPt n/a
// FPEXT < FloatPt n/a FloatPt n/a
// PTRTOINT n/a Pointer n/a Integral Unsigned
+ // PTRTOADDR n/a Pointer n/a Integral Unsigned
// INTTOPTR n/a Integral Unsigned Pointer n/a
// BITCAST = FirstClass n/a FirstClass n/a
// ADDRSPCST n/a Pointer n/a Pointer n/a
@@ -2878,7 +2879,7 @@ unsigned CastInst::isEliminableCastPair(
{ 99,99,99, 2, 2,99,99, 8, 2,99,99,99, 4, 0}, // FPExt |
{ 1, 0, 0,99,99, 0, 0,99,99,99,99, 7, 3, 0}, // PtrToInt |
{ 1, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // PtrToAddr |
- { 99,99,99,99,99,99,99,99,99,11,99,99,15, 0}, // IntToPtr |
+ { 99,99,99,99,99,99,99,99,99,11,11,99,15, 0}, // IntToPtr |
{ 5, 5, 5, 0, 0, 5, 5, 0, 0,16,16, 5, 1,14}, // BitCast |
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+
};
@@ -2972,7 +2973,8 @@ unsigned CastInst::isEliminableCastPair(
// zext, sext -> zext, because sext can't sign extend after zext
return Instruction::ZExt;
case 11: {
- // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
+ // inttoptr, ptrtoint/ptrtoaddr -> bitcast if SrcSize<=PtrSize and
+ // SrcSize==DstSize
if (!MidIntPtrTy)
return 0;
unsigned PtrSize = MidIntPtrTy->getScalarSizeInBits();
diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index 5827292..99029c1 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -252,6 +252,13 @@ void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName) {
MDB.createString(PassName)}));
}
+void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F,
+ StringRef PassName) {
+ if (std::optional<Function::ProfileCount> EC = F.getEntryCount();
+ EC && EC->getCount() > 0)
+ setExplicitlyUnknownBranchWeights(I, PassName);
+}
+
void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName) {
MDBuilder MDB(F.getContext());
F.setMetadata(
diff --git a/llvm/lib/Object/OffloadBundle.cpp b/llvm/lib/Object/OffloadBundle.cpp
index 1e1042c..0dd378e 100644
--- a/llvm/lib/Object/OffloadBundle.cpp
+++ b/llvm/lib/Object/OffloadBundle.cpp
@@ -89,17 +89,17 @@ Error OffloadBundleFatBin::readEntries(StringRef Buffer,
uint64_t EntryIDSize;
StringRef EntryID;
- if (auto EC = Reader.readInteger(EntryOffset))
- return errorCodeToError(object_error::parse_failed);
+ if (Error Err = Reader.readInteger(EntryOffset))
+ return Err;
- if (auto EC = Reader.readInteger(EntrySize))
- return errorCodeToError(object_error::parse_failed);
+ if (Error Err = Reader.readInteger(EntrySize))
+ return Err;
- if (auto EC = Reader.readInteger(EntryIDSize))
- return errorCodeToError(object_error::parse_failed);
+ if (Error Err = Reader.readInteger(EntryIDSize))
+ return Err;
- if (auto EC = Reader.readFixedString(EntryID, EntryIDSize))
- return errorCodeToError(object_error::parse_failed);
+ if (Error Err = Reader.readFixedString(EntryID, EntryIDSize))
+ return Err;
auto Entry = std::make_unique<OffloadBundleEntry>(
EntryOffset + SectionOffset, EntrySize, EntryIDSize, EntryID);
@@ -125,7 +125,7 @@ OffloadBundleFatBin::create(MemoryBufferRef Buf, uint64_t SectionOffset,
// Read the Bundle Entries
Error Err = TheBundle->readEntries(Buf.getBuffer(), SectionOffset);
if (Err)
- return errorCodeToError(object_error::parse_failed);
+ return Err;
return std::unique_ptr<OffloadBundleFatBin>(TheBundle);
}
diff --git a/llvm/lib/Support/FileCollector.cpp b/llvm/lib/Support/FileCollector.cpp
index 5dc224a..1e5de2c 100644
--- a/llvm/lib/Support/FileCollector.cpp
+++ b/llvm/lib/Support/FileCollector.cpp
@@ -68,9 +68,8 @@ void FileCollector::PathCanonicalizer::updateWithRealPath(
SmallString<256> RealPath;
auto DirWithSymlink = CachedDirs.find(Directory);
if (DirWithSymlink == CachedDirs.end()) {
- // FIXME: Should this be a call to FileSystem::getRealpath(), in some
- // cases? What if there is nothing on disk?
- if (sys::fs::real_path(Directory, RealPath))
+ // FIXME: What if there is nothing on disk?
+ if (VFS->getRealPath(Directory, RealPath))
return;
CachedDirs[Directory] = std::string(RealPath);
} else {
diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp
index 686688a..8da6fdb 100644
--- a/llvm/lib/Support/Mustache.cpp
+++ b/llvm/lib/Support/Mustache.cpp
@@ -282,18 +282,15 @@ void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
// For example:
// The template string
// " \t{{#section}}A{{/section}}"
-// would be considered as having no text ahead and would be render as
+// would be considered as having no text ahead and would be render as:
// "A"
-// The exception for this is partial tag which requires us to
-// keep track of the indentation once it's rendered.
void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
Token &CurrentToken, Token::Type CurrentType) {
Token &PrevToken = Tokens[Idx - 1];
StringRef PrevTokenBody = PrevToken.TokenBody;
StringRef Unindented = PrevTokenBody.rtrim(" \r\t\v");
size_t Indentation = PrevTokenBody.size() - Unindented.size();
- if (CurrentType != Token::Type::Partial)
- PrevToken.TokenBody = Unindented.str();
+ PrevToken.TokenBody = Unindented.str();
CurrentToken.setIndentation(Indentation);
}
@@ -439,7 +436,8 @@ class AddIndentationStringStream : public raw_ostream {
public:
explicit AddIndentationStringStream(llvm::raw_ostream &WrappedStream,
size_t Indentation)
- : Indentation(Indentation), WrappedStream(WrappedStream) {
+ : Indentation(Indentation), WrappedStream(WrappedStream),
+ NeedsIndent(true) {
SetUnbuffered();
}
@@ -448,10 +446,15 @@ protected:
llvm::StringRef Data(Ptr, Size);
SmallString<0> Indent;
Indent.resize(Indentation, ' ');
+
for (char C : Data) {
+ if (NeedsIndent && C != '\n') {
+ WrappedStream << Indent;
+ NeedsIndent = false;
+ }
WrappedStream << C;
if (C == '\n')
- WrappedStream << Indent;
+ NeedsIndent = true;
}
}
@@ -460,6 +463,7 @@ protected:
private:
size_t Indentation;
llvm::raw_ostream &WrappedStream;
+ bool NeedsIndent;
};
class Parser {
diff --git a/llvm/lib/Support/SipHash.cpp b/llvm/lib/Support/SipHash.cpp
index 86dad66..382d36f 100644
--- a/llvm/lib/Support/SipHash.cpp
+++ b/llvm/lib/Support/SipHash.cpp
@@ -35,14 +35,19 @@ void llvm::getSipHash_2_4_128(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
siphash<2, 4>(In.data(), In.size(), K, Out);
}
-/// Compute an ABI-stable 16-bit hash of the given string.
-uint16_t llvm::getPointerAuthStableSipHash(StringRef Str) {
+/// Compute an ABI-stable 64-bit hash of the given string.
+uint64_t llvm::getStableSipHash(StringRef Str) {
static const uint8_t K[16] = {0xb5, 0xd4, 0xc9, 0xeb, 0x79, 0x10, 0x4a, 0x79,
0x6f, 0xec, 0x8b, 0x1b, 0x42, 0x87, 0x81, 0xd4};
uint8_t RawHashBytes[8];
getSipHash_2_4_64(arrayRefFromStringRef(Str), K, RawHashBytes);
- uint64_t RawHash = endian::read64le(RawHashBytes);
+ return endian::read64le(RawHashBytes);
+}
+
+/// Compute an ABI-stable 16-bit hash of the given string.
+uint16_t llvm::getPointerAuthStableSipHash(StringRef Str) {
+ uint64_t RawHash = getStableSipHash(Str);
// Produce a non-zero 16-bit discriminator.
uint16_t Discriminator = (RawHash % 0xFFFF) + 1;
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index cf78459..7ff62d4 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -1973,7 +1973,7 @@ private:
EC = FS->makeAbsolute(FullPath, Name);
Name = canonicalize(Name);
} else {
- EC = sys::fs::make_absolute(Name);
+ EC = FS->makeAbsolute(Name);
}
if (EC) {
assert(NameValueNode && "Name presence should be checked earlier");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 38718c4..7504f1a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -150,7 +150,10 @@ public:
if (!CVisited.insert(CII).second)
continue;
- if (CII->getParent() == II->getParent() && !IsLookThru(II))
+ // Same-BB filter must look at the *user*; and allow non-lookthrough
+ // users when the def is a PHI (loop-header pattern).
+ if (CII->getParent() == II->getParent() && !IsLookThru(CII) &&
+ !isa<PHINode>(II))
continue;
if (isOpLegal(CII))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index d9bfeae..0a59132 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -2562,7 +2562,9 @@ bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) {
for (Function *F : NeedsPostProcess)
Splitter.processFunction(*F);
for (Function *F : Intrinsics) {
- if (isRemovablePointerIntrinsic(F->getIntrinsicID())) {
+ // use_empty() can also occur with cases like masked load, which will
+ // have been rewritten out of the module by now but not erased.
+ if (F->use_empty() || isRemovablePointerIntrinsic(F->getIntrinsicID())) {
F->eraseFromParent();
} else {
std::optional<Function *> NewF = Intrinsic::remangleIntrinsicFunction(F);
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index cb57c43..d4d9e54 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -193,7 +193,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
// we need to invert the branch condition to jump over TrueBB when the
// condition is false.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
- CC = RISCVCC::getOppositeBranchCondition(CC);
+ CC = RISCVCC::getInverseBranchCondition(CC);
// Insert branch instruction.
BuildMI(MBB, MBBI, DL, TII->get(RISCVCC::getBrCond(CC)))
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 56db09a..6d418fd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1134,7 +1134,7 @@ unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, unsigned SelectOpc) {
}
}
-RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
+RISCVCC::CondCode RISCVCC::getInverseBranchCondition(RISCVCC::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unrecognized conditional branch");
@@ -1554,7 +1554,7 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
return Register();
};
- unsigned NewOpc = RISCVCC::getBrCond(getOppositeBranchCondition(CC));
+ unsigned NewOpc = RISCVCC::getBrCond(getInverseBranchCondition(CC));
// Might be case 1.
// Don't change 0 to 1 since we can use x0.
@@ -1801,7 +1801,7 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
// Add condition code, inverting if necessary.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
if (Invert)
- CC = RISCVCC::getOppositeBranchCondition(CC);
+ CC = RISCVCC::getInverseBranchCondition(CC);
NewMI.addImm(CC);
// Copy the false register.
@@ -3978,7 +3978,7 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
case RISCV::PseudoCCMOVGPR: {
// CCMOV can be commuted by inverting the condition.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
- CC = RISCVCC::getOppositeBranchCondition(CC);
+ CC = RISCVCC::getInverseBranchCondition(CC);
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.getOperand(3).setImm(CC);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 2bc499b..42a0c4c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -44,7 +44,7 @@ enum CondCode {
COND_INVALID
};
-CondCode getOppositeBranchCondition(CondCode);
+CondCode getInverseBranchCondition(CondCode);
unsigned getBrCond(CondCode CC, unsigned SelectOpc = 0);
} // end of namespace RISCVCC
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 563f3bb..d4124ae 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -167,6 +167,42 @@ static bool canUseShiftPair(Instruction *Inst, const APInt &Imm) {
return false;
}
+// If this is i64 AND is part of (X & -(1 << C1) & 0xffffffff) == C2 << C1),
+// DAGCombiner can convert this to (sraiw X, C1) == sext(C2) for RV64. On RV32,
+// the type will be split so only the lower 32 bits need to be compared using
+// (srai/srli X, C) == C2.
+static bool canUseShiftCmp(Instruction *Inst, const APInt &Imm) {
+ if (!Inst->hasOneUse())
+ return false;
+
+ // Look for equality comparison.
+ auto *Cmp = dyn_cast<ICmpInst>(*Inst->user_begin());
+ if (!Cmp || !Cmp->isEquality())
+ return false;
+
+ // Right hand side of comparison should be a constant.
+ auto *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
+ if (!C)
+ return false;
+
+ uint64_t Mask = Imm.getZExtValue();
+
+ // Mask should be of the form -(1 << C) in the lower 32 bits.
+ if (!isUInt<32>(Mask) || !isPowerOf2_32(-uint32_t(Mask)))
+ return false;
+
+ // Comparison constant should be a subset of Mask.
+ uint64_t CmpC = C->getZExtValue();
+ if ((CmpC & Mask) != CmpC)
+ return false;
+
+ // We'll need to sign extend the comparison constant and shift it right. Make
+ // sure the new constant can use addi/xori+seqz/snez.
+ unsigned ShiftBits = llvm::countr_zero(Mask);
+ int64_t NewCmpC = SignExtend64<32>(CmpC) >> ShiftBits;
+ return NewCmpC >= -2048 && NewCmpC <= 2048;
+}
+
InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind,
@@ -224,6 +260,9 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
canUseShiftPair(Inst, Imm))
return TTI::TCC_Free;
+ if (Inst && Idx == 1 && Imm.getBitWidth() == 64 &&
+ canUseShiftCmp(Inst, Imm))
+ return TTI::TCC_Free;
Takes12BitImm = true;
break;
case Instruction::Add:
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index b4fc8da..db85e33 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -587,7 +587,8 @@ bool SPIRVLegalizerInfo::legalizeIsFPClass(
}
if (FPClassTest PartialCheck = Mask & fcNan) {
- auto InfWithQnanBitC = buildSPIRVConstant(IntTy, Inf | QNaNBitMask);
+ auto InfWithQnanBitC =
+ buildSPIRVConstant(IntTy, std::move(Inf) | QNaNBitMask);
if (PartialCheck == fcNan) {
// isnan(V) ==> abs(V) u> int(inf)
appendToRes(
@@ -613,7 +614,7 @@ bool SPIRVLegalizerInfo::legalizeIsFPClass(
APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
auto ExpMinusOne = assignSPIRVTy(
MIRBuilder.buildSub(IntTy, Abs, buildSPIRVConstant(IntTy, ExpLSB)));
- APInt MaxExpMinusOne = ExpMask - ExpLSB;
+ APInt MaxExpMinusOne = std::move(ExpMask) - ExpLSB;
auto NormalRes = assignSPIRVTy(
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
buildSPIRVConstant(IntTy, MaxExpMinusOne)));
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e7eb67a..cd04ff5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31215,16 +31215,16 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
unsigned NumElts = VT.getVectorNumElements();
if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
- if (IsFSHR)
- std::swap(Op0, Op1);
if (IsCstSplat) {
+ if (IsFSHR)
+ std::swap(Op0, Op1);
uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
{Op0, Op1, Imm}, DAG, Subtarget);
}
- return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
+ return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT,
{Op0, Op1, Amt}, DAG, Subtarget);
}
assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
@@ -35139,8 +35139,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VALIGN)
NODE_NAME_CASE(VSHLD)
NODE_NAME_CASE(VSHRD)
- NODE_NAME_CASE(VSHLDV)
- NODE_NAME_CASE(VSHRDV)
NODE_NAME_CASE(PSHUFD)
NODE_NAME_CASE(PSHUFHW)
NODE_NAME_CASE(PSHUFLW)
@@ -45171,6 +45169,7 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
case X86ISD::Wrapper:
case X86ISD::WrapperRIP:
return true;
+ case X86ISD::INSERTPS:
case X86ISD::BLENDI:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
@@ -45241,6 +45240,7 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
case X86ISD::BLENDV:
return false;
// SSE target shuffles.
+ case X86ISD::INSERTPS:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::UNPCKL:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 8ab8c66..b55556a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -471,8 +471,7 @@ namespace llvm {
// VBMI2 Concat & Shift.
VSHLD,
VSHRD,
- VSHLDV,
- VSHRDV,
+
// Shuffle Packed Values at 128-bit granularity.
SHUF128,
MOVDDUP,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 2371ed4..564810c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12300,72 +12300,76 @@ defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
// VBMI2
//===----------------------------------------------------------------------===//
-multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
+multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
let Constraints = "$src1 = $dst",
ExeDomain = VTI.ExeDomain in {
defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
- (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
+ !if(SwapLR,
+ (VTI.VT (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src3))),
+ (VTI.VT (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src3))))>,
T8, PD, EVEX, VVVV, Sched<[sched]>;
defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
- (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (VTI.LdFrag addr:$src3))))>,
+ !if(SwapLR,
+ (VTI.VT (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.LdFrag addr:$src3)))),
+ (VTI.VT (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT (VTI.LdFrag addr:$src3)))))>,
T8, PD, EVEX, VVVV,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
-multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
+multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
- : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
+ : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched, VTI> {
let Constraints = "$src1 = $dst",
ExeDomain = VTI.ExeDomain in
defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
"${src3}"#VTI.BroadcastStr#", $src2",
"$src2, ${src3}"#VTI.BroadcastStr,
- (OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
+ !if(SwapLR,
+ (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.BroadcastLdFrag addr:$src3))),
+ (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT (VTI.BroadcastLdFrag addr:$src3))))>,
T8, PD, EVEX, VVVV, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
-multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
+multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
let Predicates = [HasVBMI2] in
- defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
+ defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.ZMM, VTI.info512>,
EVEX_V512;
let Predicates = [HasVBMI2, HasVLX] in {
- defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
+ defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.YMM, VTI.info256>,
EVEX_V256;
- defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
+ defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.XMM, VTI.info128>,
EVEX_V128;
}
}
-multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
+multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
let Predicates = [HasVBMI2] in
- defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
+ defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.ZMM, VTI.info512>,
EVEX_V512;
let Predicates = [HasVBMI2, HasVLX] in {
- defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
+ defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.YMM, VTI.info256>,
EVEX_V256;
- defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
+ defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.XMM, VTI.info128>,
EVEX_V128;
}
}
multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
- SDNode OpNode, X86SchedWriteWidths sched> {
- defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
+ SDNode OpNode, bit SwapLR, X86SchedWriteWidths sched> {
+ defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, SwapLR, sched,
avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
- defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
+ defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, SwapLR, sched,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
- defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
+ defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, SwapLR, sched,
avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
}
@@ -12381,8 +12385,8 @@ multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
}
// Concat & Shift
-defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
-defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
+defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", fshl, 0, SchedWriteVecIMul>;
+defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", fshr, 1, SchedWriteVecIMul>;
defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index b4768590..031fdc1 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -25,18 +25,12 @@ let SchedRW = [WriteLEA] in {
[(set GR32:$dst, lea32addr:$src)]>,
OpSize32, Requires<[Not64BitMode]>;
- let Predicates = [HasNDD], isCodeGenOnly = 1 in {
- def LEA64_8r : I<0x8D, MRMSrcMem, (outs GR8:$dst), (ins lea64_8mem:$src),
- "lea{b}\t{$src|$dst}, {$dst|$src}",
- [(set GR8:$dst, lea64_iaddr:$src)]>,
- OpSize16,
- Requires<[In64BitMode]>;
-
- def LEA64_16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins lea64_16mem:$src),
- "lea{w}\t{$src|$dst}, {$dst|$src}",
- [(set GR16:$dst, lea64_iaddr:$src)]>,
- OpSize16,
- Requires<[In64BitMode]>;
+ let isCodeGenOnly = 1 in {
+ def LEA64_8r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_8mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}", []>, OpSize32;
+
+ def LEA64_16r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_16mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}", []>, OpSize32;
}
def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src),
@@ -51,6 +45,11 @@ let SchedRW = [WriteLEA] in {
[(set GR64:$dst, lea64addr:$src)]>;
} // SchedRW
+let Predicates = [HasNDD] in {
+ def : Pat<(i8 lea64_iaddr:$src), (EXTRACT_SUBREG (LEA64_8r lea64_8mem:$src), sub_8bit)>;
+ def : Pat<(i16 lea64_iaddr:$src), (EXTRACT_SUBREG (LEA64_16r lea64_16mem:$src), sub_16bit)>;
+}
+
// Pseudo instruction for lea that prevent optimizer from eliminating
// the instruction.
let SchedRW = [WriteLEA], isPseudo = true, hasSideEffects = 1 in {
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 0c20ffe..5321ecf 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -406,16 +406,6 @@ def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
def X86VShld : SDNode<"X86ISD::VSHLD", SDTShuff3OpI>;
def X86VShrd : SDNode<"X86ISD::VSHRD", SDTShuff3OpI>;
-def X86VShldv : SDNode<"X86ISD::VSHLDV",
- SDTypeProfile<1, 3, [SDTCisVec<0>,
- SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>,
- SDTCisSameAs<0,3>]>>;
-def X86VShrdv : SDNode<"X86ISD::VSHRDV",
- SDTypeProfile<1, 3, [SDTCisVec<0>,
- SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>,
- SDTCisSameAs<0,3>]>>;
def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d1ca0a6..59e103cd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -880,11 +880,11 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
// zext(bool) + C -> bool ? C + 1 : C
if (match(Op0, m_ZExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return SelectInst::Create(X, InstCombiner::AddOne(Op1C), Op1);
+ return createSelectInst(X, InstCombiner::AddOne(Op1C), Op1);
// sext(bool) + C -> bool ? C - 1 : C
if (match(Op0, m_SExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return SelectInst::Create(X, InstCombiner::SubOne(Op1C), Op1);
+ return createSelectInst(X, InstCombiner::SubOne(Op1C), Op1);
// ~X + C --> (C-1) - X
if (match(Op0, m_Not(m_Value(X)))) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 7a979c1..4f94aa2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -23,6 +23,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
@@ -62,14 +63,14 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
public InstVisitor<InstCombinerImpl, Instruction *> {
public:
InstCombinerImpl(InstructionWorklist &Worklist, BuilderTy &Builder,
- bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
+ Function &F, AAResults *AA, AssumptionCache &AC,
TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
DominatorTree &DT, OptimizationRemarkEmitter &ORE,
BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI,
ProfileSummaryInfo *PSI, const DataLayout &DL,
ReversePostOrderTraversal<BasicBlock *> &RPOT)
- : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE,
- BFI, BPI, PSI, DL, RPOT) {}
+ : InstCombiner(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI, BPI,
+ PSI, DL, RPOT) {}
virtual ~InstCombinerImpl() = default;
@@ -469,6 +470,17 @@ private:
Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable,
unsigned Depth = 0);
+ SelectInst *createSelectInst(Value *C, Value *S1, Value *S2,
+ const Twine &NameStr = "",
+ InsertPosition InsertBefore = nullptr,
+ Instruction *MDFrom = nullptr) {
+ SelectInst *SI =
+ SelectInst::Create(C, S1, S2, NameStr, InsertBefore, MDFrom);
+ if (!MDFrom)
+ setExplicitlyUnknownBranchWeightsIfProfiled(*SI, F, DEBUG_TYPE);
+ return SI;
+ }
+
public:
/// Create and insert the idiom we use to indicate a block is unreachable
/// without having to rewrite the CFG from within InstCombine.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 550f095..d457e0c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1253,7 +1253,7 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
// shl (zext i1 X), C1 --> select (X, 1 << C1, 0)
if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
auto *NewC = Builder.CreateShl(ConstantInt::get(Ty, 1), C1);
- return SelectInst::Create(X, NewC, ConstantInt::getNullValue(Ty));
+ return createSelectInst(X, NewC, ConstantInt::getNullValue(Ty));
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index f0ddd5c..8fbaf68 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1735,7 +1735,7 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
Constant *Zero = ConstantInt::getNullValue(BO.getType());
Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
- return SelectInst::Create(X, TVal, FVal);
+ return createSelectInst(X, TVal, FVal);
}
static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
@@ -5934,8 +5934,8 @@ static bool combineInstructionsOverFunction(
LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
<< F.getName() << "\n");
- InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
- ORE, BFI, BPI, PSI, DL, RPOT);
+ InstCombinerImpl IC(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI,
+ BPI, PSI, DL, RPOT);
IC.MaxArraySizeForCombine = MaxArraySize;
bool MadeChangeInThisIteration = IC.prepareWorklist(F);
MadeChangeInThisIteration |= IC.run();
diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index e5bf2d1..d842275 100644
--- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -35,6 +35,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Regex.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation/CFGMST.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
@@ -92,8 +93,10 @@ class GCOVFunction;
class GCOVProfiler {
public:
- GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
- GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {}
+ GCOVProfiler()
+ : GCOVProfiler(GCOVOptions::getDefault(), *vfs::getRealFileSystem()) {}
+ GCOVProfiler(const GCOVOptions &Opts, vfs::FileSystem &VFS)
+ : Options(Opts), VFS(VFS) {}
bool
runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
@@ -110,6 +113,7 @@ public:
os->write_zeros(4 - s.size() % 4);
}
void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); }
+ vfs::FileSystem &getVirtualFileSystem() const { return VFS; }
private:
// Create the .gcno files for the Module based on DebugInfo.
@@ -166,6 +170,7 @@ private:
std::vector<Regex> ExcludeRe;
DenseSet<const BasicBlock *> ExecBlocks;
StringMap<bool> InstrumentedFiles;
+ vfs::FileSystem &VFS;
};
struct BBInfo {
@@ -214,10 +219,10 @@ static StringRef getFunctionName(const DISubprogram *SP) {
/// Prefer relative paths in the coverage notes. Clang also may split
/// up absolute paths into a directory and filename component. When
/// the relative path doesn't exist, reconstruct the absolute path.
-static SmallString<128> getFilename(const DIScope *SP) {
+static SmallString<128> getFilename(const DIScope *SP, vfs::FileSystem &VFS) {
SmallString<128> Path;
StringRef RelPath = SP->getFilename();
- if (sys::fs::exists(RelPath))
+ if (VFS.exists(RelPath))
Path = RelPath;
else
sys::path::append(Path, SP->getDirectory(), SP->getFilename());
@@ -357,7 +362,7 @@ namespace {
void writeOut(uint32_t CfgChecksum) {
write(GCOV_TAG_FUNCTION);
- SmallString<128> Filename = getFilename(SP);
+ SmallString<128> Filename = getFilename(SP, P->getVirtualFileSystem());
uint32_t BlockLen = 3 + wordsOfString(getFunctionName(SP));
BlockLen += 1 + wordsOfString(Filename) + 4;
@@ -455,7 +460,7 @@ bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
if (FilterRe.empty() && ExcludeRe.empty()) {
return true;
}
- SmallString<128> Filename = getFilename(F.getSubprogram());
+ SmallString<128> Filename = getFilename(F.getSubprogram(), VFS);
auto It = InstrumentedFiles.find(Filename);
if (It != InstrumentedFiles.end()) {
return It->second;
@@ -467,7 +472,7 @@ bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
// Path can be
// /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
// such a case we must get the real_path.
- if (sys::fs::real_path(Filename, RealPath)) {
+ if (VFS.getRealPath(Filename, RealPath)) {
// real_path can fail with path like "foo.c".
RealFilename = Filename;
} else {
@@ -524,9 +529,10 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
SmallString<128> Filename = CU->getFilename();
sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
StringRef FName = sys::path::filename(Filename);
- SmallString<128> CurPath;
- if (sys::fs::current_path(CurPath))
+ ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
+ if (!CWD)
return std::string(FName);
+ SmallString<128> CurPath{*CWD};
sys::path::append(CurPath, FName);
return std::string(CurPath);
}
@@ -554,7 +560,7 @@ bool GCOVProfiler::runOnModule(
PreservedAnalyses GCOVProfilerPass::run(Module &M,
ModuleAnalysisManager &AM) {
- GCOVProfiler Profiler(GCOVOpts);
+ GCOVProfiler Profiler(GCOVOpts, *VFS);
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -789,7 +795,7 @@ bool GCOVProfiler::emitProfileNotes(
// Add the function line number to the lines of the entry block
// to have a counter for the function definition.
uint32_t Line = SP->getLine();
- auto Filename = getFilename(SP);
+ auto Filename = getFilename(SP, VFS);
BranchProbabilityInfo *BPI = GetBPI(F);
BlockFrequencyInfo *BFI = GetBFI(F);
@@ -881,7 +887,7 @@ bool GCOVProfiler::emitProfileNotes(
if (SP != getDISubprogram(Scope))
continue;
- GCOVLines &Lines = Block.getFile(getFilename(Loc->getScope()));
+ GCOVLines &Lines = Block.getFile(getFilename(Loc->getScope(), VFS));
Lines.addLine(Loc.getLine());
}
Line = 0;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 2d84b4a..216bdf4 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -84,7 +84,6 @@
#include <cstdint>
#include <iterator>
#include <map>
-#include <numeric>
#include <optional>
#include <set>
#include <tuple>
@@ -6356,25 +6355,25 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
if (DefaultResult) {
Value *ValueCompare =
Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
- SelectInst *SelectValueInst = cast<SelectInst>(Builder.CreateSelect(
- ValueCompare, ResultVector[1].first, DefaultResult, "switch.select"));
- SelectValue = SelectValueInst;
- if (HasBranchWeights) {
+ SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
+ DefaultResult, "switch.select");
+ if (auto *SI = dyn_cast<SelectInst>(SelectValue);
+ SI && HasBranchWeights) {
// We start with 3 probabilities, where the numerator is the
// corresponding BranchWeights[i], and the denominator is the sum over
// BranchWeights. We want the probability and negative probability of
// Condition == SecondCase.
assert(BranchWeights.size() == 3);
- setBranchWeights(SelectValueInst, BranchWeights[2],
+ setBranchWeights(SI, BranchWeights[2],
BranchWeights[0] + BranchWeights[1],
/*IsExpected=*/false);
}
}
Value *ValueCompare =
Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
- SelectInst *Ret = cast<SelectInst>(Builder.CreateSelect(
- ValueCompare, ResultVector[0].first, SelectValue, "switch.select"));
- if (HasBranchWeights) {
+ Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
+ SelectValue, "switch.select");
+ if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
// We may have had a DefaultResult. Base the position of the first and
// second's branch weights accordingly. Also the proability that Condition
// != FirstCase needs to take that into account.
@@ -6382,7 +6381,7 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
size_t FirstCasePos = (Condition != nullptr);
size_t SecondCasePos = FirstCasePos + 1;
uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
- setBranchWeights(Ret, BranchWeights[FirstCasePos],
+ setBranchWeights(SI, BranchWeights[FirstCasePos],
DefaultCase + BranchWeights[SecondCasePos],
/*IsExpected=*/false);
}
@@ -6422,13 +6421,13 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
Value *And = Builder.CreateAnd(Condition, AndMask);
Value *Cmp = Builder.CreateICmpEQ(
And, Constant::getIntegerValue(And->getType(), AndMask));
- SelectInst *Ret = cast<SelectInst>(
- Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult));
- if (HasBranchWeights) {
+ Value *Ret =
+ Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
// We know there's a Default case. We base the resulting branch
// weights off its probability.
assert(BranchWeights.size() >= 2);
- setBranchWeights(Ret, accumulate(drop_begin(BranchWeights), 0),
+ setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0),
BranchWeights[0], /*IsExpected=*/false);
}
return Ret;
@@ -6448,11 +6447,11 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
Value *Cmp = Builder.CreateICmpEQ(
And, Constant::getNullValue(And->getType()), "switch.selectcmp");
- SelectInst *Ret = cast<SelectInst>(
- Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult));
- if (HasBranchWeights) {
+ Value *Ret =
+ Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
assert(BranchWeights.size() >= 2);
- setBranchWeights(Ret, accumulate(drop_begin(BranchWeights), 0),
+ setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0),
BranchWeights[0], /*IsExpected=*/false);
}
return Ret;
@@ -6466,11 +6465,11 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
"switch.selectcmp.case2");
Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
- SelectInst *Ret = cast<SelectInst>(
- Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult));
- if (HasBranchWeights) {
+ Value *Ret =
+ Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
assert(BranchWeights.size() >= 2);
- setBranchWeights(Ret, accumulate(drop_begin(BranchWeights), 0),
+ setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0),
BranchWeights[0], /*IsExpected=*/false);
}
return Ret;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 065622e..c547662 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1100,7 +1100,9 @@ class BinOpSameOpcodeHelper {
// constant + x cannot be -constant - x
// instead, it should be x - -constant
if (Pos == 1 ||
- (FromOpcode == Instruction::Add && ToOpcode == Instruction::Sub))
+ ((FromOpcode == Instruction::Add || FromOpcode == Instruction::Or ||
+ FromOpcode == Instruction::Xor) &&
+ ToOpcode == Instruction::Sub))
return SmallVector<Value *>({LHS, RHS});
return SmallVector<Value *>({RHS, LHS});
}
@@ -1188,6 +1190,10 @@ public:
if (CIValue.isAllOnes())
InterchangeableMask = CanBeAll;
break;
+ case Instruction::Xor:
+ if (CIValue.isZero())
+ InterchangeableMask = XorBIT | OrBIT | AndBIT | SubBIT | AddBIT;
+ break;
default:
if (CIValue.isZero())
InterchangeableMask = CanBeAll;
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 0ef933f..32704bd 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2487,21 +2487,31 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask))))
return false;
+ // Check whether this is a binary shuffle.
+ bool IsBinaryShuffle = !isa<UndefValue>(V1);
+
auto *C0 = dyn_cast<CastInst>(V0);
auto *C1 = dyn_cast<CastInst>(V1);
- if (!C0 || !C1)
+ if (!C0 || (IsBinaryShuffle && !C1))
return false;
Instruction::CastOps Opcode = C0->getOpcode();
- if (C0->getSrcTy() != C1->getSrcTy())
+
+ // If this is allowed, foldShuffleOfCastops can get stuck in a loop
+ // with foldBitcastOfShuffle. Reject in favor of foldBitcastOfShuffle.
+ if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
return false;
- // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds.
- if (Opcode != C1->getOpcode()) {
- if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value())))
- Opcode = Instruction::SExt;
- else
+ if (IsBinaryShuffle) {
+ if (C0->getSrcTy() != C1->getSrcTy())
return false;
+ // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds.
+ if (Opcode != C1->getOpcode()) {
+ if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value())))
+ Opcode = Instruction::SExt;
+ else
+ return false;
+ }
}
auto *ShuffleDstTy = dyn_cast<FixedVectorType>(I.getType());
@@ -2544,23 +2554,31 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
InstructionCost CostC0 =
TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy,
TTI::CastContextHint::None, CostKind);
- InstructionCost CostC1 =
- TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy,
- TTI::CastContextHint::None, CostKind);
- InstructionCost OldCost = CostC0 + CostC1;
- OldCost +=
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy,
- CastDstTy, OldMask, CostKind, 0, nullptr, {}, &I);
- InstructionCost NewCost =
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, NewShuffleDstTy,
- CastSrcTy, NewMask, CostKind);
+ TargetTransformInfo::ShuffleKind ShuffleKind;
+ if (IsBinaryShuffle)
+ ShuffleKind = TargetTransformInfo::SK_PermuteTwoSrc;
+ else
+ ShuffleKind = TargetTransformInfo::SK_PermuteSingleSrc;
+
+ InstructionCost OldCost = CostC0;
+ OldCost += TTI.getShuffleCost(ShuffleKind, ShuffleDstTy, CastDstTy, OldMask,
+ CostKind, 0, nullptr, {}, &I);
+
+ InstructionCost NewCost = TTI.getShuffleCost(ShuffleKind, NewShuffleDstTy,
+ CastSrcTy, NewMask, CostKind);
NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy,
TTI::CastContextHint::None, CostKind);
if (!C0->hasOneUse())
NewCost += CostC0;
- if (!C1->hasOneUse())
- NewCost += CostC1;
+ if (IsBinaryShuffle) {
+ InstructionCost CostC1 =
+ TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy,
+ TTI::CastContextHint::None, CostKind);
+ OldCost += CostC1;
+ if (!C1->hasOneUse())
+ NewCost += CostC1;
+ }
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two casts: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
@@ -2568,14 +2586,20 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
if (NewCost > OldCost)
return false;
- Value *Shuf = Builder.CreateShuffleVector(C0->getOperand(0),
- C1->getOperand(0), NewMask);
+ Value *Shuf;
+ if (IsBinaryShuffle)
+ Shuf = Builder.CreateShuffleVector(C0->getOperand(0), C1->getOperand(0),
+ NewMask);
+ else
+ Shuf = Builder.CreateShuffleVector(C0->getOperand(0), NewMask);
+
Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy);
// Intersect flags from the old casts.
if (auto *NewInst = dyn_cast<Instruction>(Cast)) {
NewInst->copyIRFlags(C0);
- NewInst->andIRFlags(C1);
+ if (IsBinaryShuffle)
+ NewInst->andIRFlags(C1);
}
Worklist.pushValue(Shuf);
@@ -4433,7 +4457,7 @@ bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
// Create new mask using difference of the two incoming masks.
int MaskOffset = NewMask[0u];
- unsigned Index = (InputNumElements - MaskOffset) % InputNumElements;
+ unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
NewMask.clear();
for (unsigned I = 0u; I < InputNumElements; ++I) {